1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * libata-eh.c - libata error handling |
4 | * |
5 | * Copyright 2006 Tejun Heo <htejun@gmail.com> |
6 | * |
7 | * libata documentation is available via 'make {ps|pdf}docs', |
8 | * as Documentation/driver-api/libata.rst |
9 | * |
10 | * Hardware documentation available from http://www.t13.org/ and |
11 | * http://www.sata-io.org/ |
12 | */ |
13 | |
14 | #include <linux/kernel.h> |
15 | #include <linux/blkdev.h> |
16 | #include <linux/export.h> |
17 | #include <linux/pci.h> |
18 | #include <scsi/scsi.h> |
19 | #include <scsi/scsi_host.h> |
20 | #include <scsi/scsi_eh.h> |
21 | #include <scsi/scsi_device.h> |
22 | #include <scsi/scsi_cmnd.h> |
23 | #include <scsi/scsi_dbg.h> |
24 | #include "../scsi/scsi_transport_api.h" |
25 | |
26 | #include <linux/libata.h> |
27 | |
28 | #include <trace/events/libata.h> |
29 | #include "libata.h" |
30 | |
31 | enum { |
32 | /* speed down verdicts */ |
33 | ATA_EH_SPDN_NCQ_OFF = (1 << 0), |
34 | ATA_EH_SPDN_SPEED_DOWN = (1 << 1), |
35 | ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), |
36 | ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), |
37 | |
38 | /* error flags */ |
39 | ATA_EFLAG_IS_IO = (1 << 0), |
40 | ATA_EFLAG_DUBIOUS_XFER = (1 << 1), |
41 | ATA_EFLAG_OLD_ER = (1 << 31), |
42 | |
43 | /* error categories */ |
44 | ATA_ECAT_NONE = 0, |
45 | ATA_ECAT_ATA_BUS = 1, |
46 | ATA_ECAT_TOUT_HSM = 2, |
47 | ATA_ECAT_UNK_DEV = 3, |
48 | ATA_ECAT_DUBIOUS_NONE = 4, |
49 | ATA_ECAT_DUBIOUS_ATA_BUS = 5, |
50 | ATA_ECAT_DUBIOUS_TOUT_HSM = 6, |
51 | ATA_ECAT_DUBIOUS_UNK_DEV = 7, |
52 | ATA_ECAT_NR = 8, |
53 | |
54 | ATA_EH_CMD_DFL_TIMEOUT = 5000, |
55 | |
56 | /* always put at least this amount of time between resets */ |
57 | ATA_EH_RESET_COOL_DOWN = 5000, |
58 | |
59 | /* Waiting in ->prereset can never be reliable. It's |
60 | * sometimes nice to wait there but it can't be depended upon; |
61 | * otherwise, we wouldn't be resetting. Just give it enough |
62 | * time for most drives to spin up. |
63 | */ |
64 | ATA_EH_PRERESET_TIMEOUT = 10000, |
65 | ATA_EH_FASTDRAIN_INTERVAL = 3000, |
66 | |
67 | ATA_EH_UA_TRIES = 5, |
68 | |
69 | /* probe speed down parameters, see ata_eh_schedule_probe() */ |
70 | ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ |
71 | ATA_EH_PROBE_TRIALS = 2, |
72 | }; |
73 | |
74 | /* The following table determines how we sequence resets. Each entry |
75 | * represents timeout for that try. The first try can be soft or |
76 | * hardreset. All others are hardreset if available. In most cases |
77 | * the first reset w/ 10sec timeout should succeed. Following entries |
78 | * are mostly for error handling, hotplug and those outlier devices that |
79 | * take an exceptionally long time to recover from reset. |
80 | */ |
81 | static const unsigned int ata_eh_reset_timeouts[] = { |
82 | 10000, /* most drives spin up by 10sec */ |
83 | 10000, /* > 99% working drives spin up before 20sec */ |
84 | 35000, /* give > 30 secs of idleness for outlier devices */ |
85 | 5000, /* and sweet one last chance */ |
86 | UINT_MAX, /* > 1 min has elapsed, give up */ |
87 | }; |
88 | |
89 | static const unsigned int ata_eh_identify_timeouts[] = { |
90 | 5000, /* covers > 99% of successes and not too boring on failures */ |
91 | 10000, /* combined time till here is enough even for media access */ |
92 | 30000, /* for true idiots */ |
93 | UINT_MAX, |
94 | }; |
95 | |
96 | static const unsigned int ata_eh_revalidate_timeouts[] = { |
97 | 15000, /* Some drives are slow to read log pages when waking-up */ |
98 | 15000, /* combined time till here is enough even for media access */ |
99 | UINT_MAX, |
100 | }; |
101 | |
102 | static const unsigned int ata_eh_flush_timeouts[] = { |
103 | 15000, /* be generous with flush */ |
104 | 15000, /* ditto */ |
105 | 30000, /* and even more generous */ |
106 | UINT_MAX, |
107 | }; |
108 | |
109 | static const unsigned int ata_eh_other_timeouts[] = { |
110 | 5000, /* same rationale as identify timeout */ |
111 | 10000, /* ditto */ |
112 | /* but no merciful 30sec for other commands, it just isn't worth it */ |
113 | UINT_MAX, |
114 | }; |
115 | |
116 | struct ata_eh_cmd_timeout_ent { |
117 | const u8 *commands; |
118 | const unsigned int *timeouts; |
119 | }; |
120 | |
121 | /* The following table determines timeouts to use for EH internal |
122 | * commands. Each table entry is a command class and matches the |
123 | * commands the entry applies to and the timeout table to use. |
124 | * |
125 | * On the retry after a command timed out, the next timeout value from |
126 | * the table is used. If the table doesn't contain further entries, |
127 | * the last value is used. |
128 | * |
129 | * ehc->cmd_timeout_idx keeps track of which timeout to use per |
130 | * command class, so if SET_FEATURES times out on the first try, the |
131 | * next try will use the second timeout value only for that class. |
132 | */ |
133 | #define CMDS(cmds...) (const u8 []){ cmds, 0 } |
134 | static const struct ata_eh_cmd_timeout_ent |
135 | ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { |
136 | { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), |
137 | .timeouts = ata_eh_identify_timeouts, }, |
138 | { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), |
139 | .timeouts = ata_eh_revalidate_timeouts, }, |
140 | { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), |
141 | .timeouts = ata_eh_other_timeouts, }, |
142 | { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), |
143 | .timeouts = ata_eh_other_timeouts, }, |
144 | { .commands = CMDS(ATA_CMD_SET_FEATURES), |
145 | .timeouts = ata_eh_other_timeouts, }, |
146 | { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), |
147 | .timeouts = ata_eh_other_timeouts, }, |
148 | { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), |
149 | .timeouts = ata_eh_flush_timeouts }, |
150 | { .commands = CMDS(ATA_CMD_VERIFY), |
151 | .timeouts = ata_eh_reset_timeouts }, |
152 | }; |
153 | #undef CMDS |
154 | |
155 | static void __ata_port_freeze(struct ata_port *ap); |
156 | static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, |
157 | struct ata_device **r_failed_dev); |
158 | #ifdef CONFIG_PM |
159 | static void ata_eh_handle_port_suspend(struct ata_port *ap); |
160 | static void ata_eh_handle_port_resume(struct ata_port *ap); |
161 | #else /* CONFIG_PM */ |
162 | static void ata_eh_handle_port_suspend(struct ata_port *ap) |
163 | { } |
164 | |
165 | static void ata_eh_handle_port_resume(struct ata_port *ap) |
166 | { } |
167 | #endif /* CONFIG_PM */ |
168 | |
169 | static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, |
170 | const char *fmt, va_list args) |
171 | { |
172 | ehi->desc_len += vscnprintf(buf: ehi->desc + ehi->desc_len, |
173 | size: ATA_EH_DESC_LEN - ehi->desc_len, |
174 | fmt, args); |
175 | } |
176 | |
177 | /** |
178 | * __ata_ehi_push_desc - push error description without adding separator |
179 | * @ehi: target EHI |
180 | * @fmt: printf format string |
181 | * |
182 | * Format string according to @fmt and append it to @ehi->desc. |
183 | * |
184 | * LOCKING: |
185 | * spin_lock_irqsave(host lock) |
186 | */ |
187 | void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) |
188 | { |
189 | va_list args; |
190 | |
191 | va_start(args, fmt); |
192 | __ata_ehi_pushv_desc(ehi, fmt, args); |
193 | va_end(args); |
194 | } |
195 | EXPORT_SYMBOL_GPL(__ata_ehi_push_desc); |
196 | |
197 | /** |
198 | * ata_ehi_push_desc - push error description with separator |
199 | * @ehi: target EHI |
200 | * @fmt: printf format string |
201 | * |
202 | * Format string according to @fmt and append it to @ehi->desc. |
203 | * If @ehi->desc is not empty, ", " is added in-between. |
204 | * |
205 | * LOCKING: |
206 | * spin_lock_irqsave(host lock) |
207 | */ |
208 | void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) |
209 | { |
210 | va_list args; |
211 | |
212 | if (ehi->desc_len) |
213 | __ata_ehi_push_desc(ehi, ", " ); |
214 | |
215 | va_start(args, fmt); |
216 | __ata_ehi_pushv_desc(ehi, fmt, args); |
217 | va_end(args); |
218 | } |
219 | EXPORT_SYMBOL_GPL(ata_ehi_push_desc); |
220 | |
221 | /** |
222 | * ata_ehi_clear_desc - clean error description |
223 | * @ehi: target EHI |
224 | * |
225 | * Clear @ehi->desc. |
226 | * |
227 | * LOCKING: |
228 | * spin_lock_irqsave(host lock) |
229 | */ |
230 | void ata_ehi_clear_desc(struct ata_eh_info *ehi) |
231 | { |
232 | ehi->desc[0] = '\0'; |
233 | ehi->desc_len = 0; |
234 | } |
235 | EXPORT_SYMBOL_GPL(ata_ehi_clear_desc); |
236 | |
237 | /** |
238 | * ata_port_desc - append port description |
239 | * @ap: target ATA port |
240 | * @fmt: printf format string |
241 | * |
242 | * Format string according to @fmt and append it to port |
243 | * description. If port description is not empty, " " is added |
244 | * in-between. This function is to be used while initializing |
245 | * ata_host. The description is printed on host registration. |
246 | * |
247 | * LOCKING: |
248 | * None. |
249 | */ |
250 | void ata_port_desc(struct ata_port *ap, const char *fmt, ...) |
251 | { |
252 | va_list args; |
253 | |
254 | WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); |
255 | |
256 | if (ap->link.eh_info.desc_len) |
257 | __ata_ehi_push_desc(&ap->link.eh_info, " " ); |
258 | |
259 | va_start(args, fmt); |
260 | __ata_ehi_pushv_desc(ehi: &ap->link.eh_info, fmt, args); |
261 | va_end(args); |
262 | } |
263 | EXPORT_SYMBOL_GPL(ata_port_desc); |
264 | |
265 | #ifdef CONFIG_PCI |
266 | /** |
267 | * ata_port_pbar_desc - append PCI BAR description |
268 | * @ap: target ATA port |
269 | * @bar: target PCI BAR |
270 | * @offset: offset into PCI BAR |
271 | * @name: name of the area |
272 | * |
273 | * If @offset is negative, this function formats a string which |
274 | * contains the name, address, size and type of the BAR and |
275 | * appends it to the port description. If @offset is zero or |
276 | * positive, only name and offsetted address is appended. |
277 | * |
278 | * LOCKING: |
279 | * None. |
280 | */ |
281 | void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, |
282 | const char *name) |
283 | { |
284 | struct pci_dev *pdev = to_pci_dev(ap->host->dev); |
285 | char *type = "" ; |
286 | unsigned long long start, len; |
287 | |
288 | if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) |
289 | type = "m" ; |
290 | else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) |
291 | type = "i" ; |
292 | |
293 | start = (unsigned long long)pci_resource_start(pdev, bar); |
294 | len = (unsigned long long)pci_resource_len(pdev, bar); |
295 | |
296 | if (offset < 0) |
297 | ata_port_desc(ap, "%s %s%llu@0x%llx" , name, type, len, start); |
298 | else |
299 | ata_port_desc(ap, "%s 0x%llx" , name, |
300 | start + (unsigned long long)offset); |
301 | } |
302 | EXPORT_SYMBOL_GPL(ata_port_pbar_desc); |
303 | #endif /* CONFIG_PCI */ |
304 | |
305 | static int ata_lookup_timeout_table(u8 cmd) |
306 | { |
307 | int i; |
308 | |
309 | for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { |
310 | const u8 *cur; |
311 | |
312 | for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) |
313 | if (*cur == cmd) |
314 | return i; |
315 | } |
316 | |
317 | return -1; |
318 | } |
319 | |
320 | /** |
321 | * ata_internal_cmd_timeout - determine timeout for an internal command |
322 | * @dev: target device |
323 | * @cmd: internal command to be issued |
324 | * |
325 | * Determine timeout for internal command @cmd for @dev. |
326 | * |
327 | * LOCKING: |
328 | * EH context. |
329 | * |
330 | * RETURNS: |
331 | * Determined timeout. |
332 | */ |
333 | unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) |
334 | { |
335 | struct ata_eh_context *ehc = &dev->link->eh_context; |
336 | int ent = ata_lookup_timeout_table(cmd); |
337 | int idx; |
338 | |
339 | if (ent < 0) |
340 | return ATA_EH_CMD_DFL_TIMEOUT; |
341 | |
342 | idx = ehc->cmd_timeout_idx[dev->devno][ent]; |
343 | return ata_eh_cmd_timeout_table[ent].timeouts[idx]; |
344 | } |
345 | |
346 | /** |
347 | * ata_internal_cmd_timed_out - notification for internal command timeout |
348 | * @dev: target device |
349 | * @cmd: internal command which timed out |
350 | * |
351 | * Notify EH that internal command @cmd for @dev timed out. This |
352 | * function should be called only for commands whose timeouts are |
353 | * determined using ata_internal_cmd_timeout(). |
354 | * |
355 | * LOCKING: |
356 | * EH context. |
357 | */ |
358 | void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) |
359 | { |
360 | struct ata_eh_context *ehc = &dev->link->eh_context; |
361 | int ent = ata_lookup_timeout_table(cmd); |
362 | int idx; |
363 | |
364 | if (ent < 0) |
365 | return; |
366 | |
367 | idx = ehc->cmd_timeout_idx[dev->devno][ent]; |
368 | if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX) |
369 | ehc->cmd_timeout_idx[dev->devno][ent]++; |
370 | } |
371 | |
372 | static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, |
373 | unsigned int err_mask) |
374 | { |
375 | struct ata_ering_entry *ent; |
376 | |
377 | WARN_ON(!err_mask); |
378 | |
379 | ering->cursor++; |
380 | ering->cursor %= ATA_ERING_SIZE; |
381 | |
382 | ent = &ering->ring[ering->cursor]; |
383 | ent->eflags = eflags; |
384 | ent->err_mask = err_mask; |
385 | ent->timestamp = get_jiffies_64(); |
386 | } |
387 | |
388 | static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) |
389 | { |
390 | struct ata_ering_entry *ent = &ering->ring[ering->cursor]; |
391 | |
392 | if (ent->err_mask) |
393 | return ent; |
394 | return NULL; |
395 | } |
396 | |
397 | int ata_ering_map(struct ata_ering *ering, |
398 | int (*map_fn)(struct ata_ering_entry *, void *), |
399 | void *arg) |
400 | { |
401 | int idx, rc = 0; |
402 | struct ata_ering_entry *ent; |
403 | |
404 | idx = ering->cursor; |
405 | do { |
406 | ent = &ering->ring[idx]; |
407 | if (!ent->err_mask) |
408 | break; |
409 | rc = map_fn(ent, arg); |
410 | if (rc) |
411 | break; |
412 | idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; |
413 | } while (idx != ering->cursor); |
414 | |
415 | return rc; |
416 | } |
417 | |
418 | static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) |
419 | { |
420 | ent->eflags |= ATA_EFLAG_OLD_ER; |
421 | return 0; |
422 | } |
423 | |
424 | static void ata_ering_clear(struct ata_ering *ering) |
425 | { |
426 | ata_ering_map(ering, map_fn: ata_ering_clear_cb, NULL); |
427 | } |
428 | |
429 | static unsigned int ata_eh_dev_action(struct ata_device *dev) |
430 | { |
431 | struct ata_eh_context *ehc = &dev->link->eh_context; |
432 | |
433 | return ehc->i.action | ehc->i.dev_action[dev->devno]; |
434 | } |
435 | |
436 | static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, |
437 | struct ata_eh_info *ehi, unsigned int action) |
438 | { |
439 | struct ata_device *tdev; |
440 | |
441 | if (!dev) { |
442 | ehi->action &= ~action; |
443 | ata_for_each_dev(tdev, link, ALL) |
444 | ehi->dev_action[tdev->devno] &= ~action; |
445 | } else { |
446 | /* doesn't make sense for port-wide EH actions */ |
447 | WARN_ON(!(action & ATA_EH_PERDEV_MASK)); |
448 | |
449 | /* break ehi->action into ehi->dev_action */ |
450 | if (ehi->action & action) { |
451 | ata_for_each_dev(tdev, link, ALL) |
452 | ehi->dev_action[tdev->devno] |= |
453 | ehi->action & action; |
454 | ehi->action &= ~action; |
455 | } |
456 | |
457 | /* turn off the specified per-dev action */ |
458 | ehi->dev_action[dev->devno] &= ~action; |
459 | } |
460 | } |
461 | |
462 | /** |
463 | * ata_eh_acquire - acquire EH ownership |
464 | * @ap: ATA port to acquire EH ownership for |
465 | * |
466 | * Acquire EH ownership for @ap. This is the basic exclusion |
467 | * mechanism for ports sharing a host. Only one port hanging off |
468 | * the same host can claim the ownership of EH. |
469 | * |
470 | * LOCKING: |
471 | * EH context. |
472 | */ |
473 | void ata_eh_acquire(struct ata_port *ap) |
474 | { |
475 | mutex_lock(&ap->host->eh_mutex); |
476 | WARN_ON_ONCE(ap->host->eh_owner); |
477 | ap->host->eh_owner = current; |
478 | } |
479 | |
480 | /** |
481 | * ata_eh_release - release EH ownership |
482 | * @ap: ATA port to release EH ownership for |
483 | * |
484 | * Release EH ownership for @ap if the caller. The caller must |
485 | * have acquired EH ownership using ata_eh_acquire() previously. |
486 | * |
487 | * LOCKING: |
488 | * EH context. |
489 | */ |
490 | void ata_eh_release(struct ata_port *ap) |
491 | { |
492 | WARN_ON_ONCE(ap->host->eh_owner != current); |
493 | ap->host->eh_owner = NULL; |
494 | mutex_unlock(lock: &ap->host->eh_mutex); |
495 | } |
496 | |
497 | static void ata_eh_dev_disable(struct ata_device *dev) |
498 | { |
499 | ata_acpi_on_disable(dev); |
500 | ata_down_xfermask_limit(dev, sel: ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); |
501 | dev->class++; |
502 | |
503 | /* From now till the next successful probe, ering is used to |
504 | * track probe failures. Clear accumulated device error info. |
505 | */ |
506 | ata_ering_clear(ering: &dev->ering); |
507 | } |
508 | |
509 | static void ata_eh_unload(struct ata_port *ap) |
510 | { |
511 | struct ata_link *link; |
512 | struct ata_device *dev; |
513 | unsigned long flags; |
514 | |
515 | /* |
516 | * Unless we are restarting, transition all enabled devices to |
517 | * standby power mode. |
518 | */ |
519 | if (system_state != SYSTEM_RESTART) { |
520 | ata_for_each_link(link, ap, PMP_FIRST) { |
521 | ata_for_each_dev(dev, link, ENABLED) |
522 | ata_dev_power_set_standby(dev); |
523 | } |
524 | } |
525 | |
526 | /* |
527 | * Restore SControl IPM and SPD for the next driver and |
528 | * disable attached devices. |
529 | */ |
530 | ata_for_each_link(link, ap, PMP_FIRST) { |
531 | sata_scr_write(link, reg: SCR_CONTROL, val: link->saved_scontrol & 0xff0); |
532 | ata_for_each_dev(dev, link, ENABLED) |
533 | ata_eh_dev_disable(dev); |
534 | } |
535 | |
536 | /* freeze and set UNLOADED */ |
537 | spin_lock_irqsave(ap->lock, flags); |
538 | |
539 | ata_port_freeze(ap); /* won't be thawed */ |
540 | ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ |
541 | ap->pflags |= ATA_PFLAG_UNLOADED; |
542 | |
543 | spin_unlock_irqrestore(lock: ap->lock, flags); |
544 | } |
545 | |
546 | /** |
547 | * ata_scsi_error - SCSI layer error handler callback |
548 | * @host: SCSI host on which error occurred |
549 | * |
550 | * Handles SCSI-layer-thrown error events. |
551 | * |
552 | * LOCKING: |
553 | * Inherited from SCSI layer (none, can sleep) |
554 | * |
555 | * RETURNS: |
556 | * Zero. |
557 | */ |
558 | void ata_scsi_error(struct Scsi_Host *host) |
559 | { |
560 | struct ata_port *ap = ata_shost_to_port(host); |
561 | unsigned long flags; |
562 | LIST_HEAD(eh_work_q); |
563 | |
564 | spin_lock_irqsave(host->host_lock, flags); |
565 | list_splice_init(list: &host->eh_cmd_q, head: &eh_work_q); |
566 | spin_unlock_irqrestore(lock: host->host_lock, flags); |
567 | |
568 | ata_scsi_cmd_error_handler(host, ap, eh_q: &eh_work_q); |
569 | |
570 | /* If we timed raced normal completion and there is nothing to |
571 | recover nr_timedout == 0 why exactly are we doing error recovery ? */ |
572 | ata_scsi_port_error_handler(host, ap); |
573 | |
574 | /* finish or retry handled scmd's and clean up */ |
575 | WARN_ON(!list_empty(&eh_work_q)); |
576 | |
577 | } |
578 | |
579 | /** |
580 | * ata_scsi_cmd_error_handler - error callback for a list of commands |
581 | * @host: scsi host containing the port |
582 | * @ap: ATA port within the host |
583 | * @eh_work_q: list of commands to process |
584 | * |
585 | * process the given list of commands and return those finished to the |
586 | * ap->eh_done_q. This function is the first part of the libata error |
587 | * handler which processes a given list of failed commands. |
588 | */ |
589 | void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, |
590 | struct list_head *eh_work_q) |
591 | { |
592 | int i; |
593 | unsigned long flags; |
594 | struct scsi_cmnd *scmd, *tmp; |
595 | int nr_timedout = 0; |
596 | |
597 | /* make sure sff pio task is not running */ |
598 | ata_sff_flush_pio_task(ap); |
599 | |
600 | /* synchronize with host lock and sort out timeouts */ |
601 | |
602 | /* |
603 | * For EH, all qcs are finished in one of three ways - |
604 | * normal completion, error completion, and SCSI timeout. |
605 | * Both completions can race against SCSI timeout. When normal |
606 | * completion wins, the qc never reaches EH. When error |
607 | * completion wins, the qc has ATA_QCFLAG_EH set. |
608 | * |
609 | * When SCSI timeout wins, things are a bit more complex. |
610 | * Normal or error completion can occur after the timeout but |
611 | * before this point. In such cases, both types of |
612 | * completions are honored. A scmd is determined to have |
613 | * timed out iff its associated qc is active and not failed. |
614 | */ |
615 | spin_lock_irqsave(ap->lock, flags); |
616 | |
617 | /* |
618 | * This must occur under the ap->lock as we don't want |
619 | * a polled recovery to race the real interrupt handler |
620 | * |
621 | * The lost_interrupt handler checks for any completed but |
622 | * non-notified command and completes much like an IRQ handler. |
623 | * |
624 | * We then fall into the error recovery code which will treat |
625 | * this as if normal completion won the race |
626 | */ |
627 | if (ap->ops->lost_interrupt) |
628 | ap->ops->lost_interrupt(ap); |
629 | |
630 | list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { |
631 | struct ata_queued_cmd *qc; |
632 | |
633 | ata_qc_for_each_raw(ap, qc, i) { |
634 | if (qc->flags & ATA_QCFLAG_ACTIVE && |
635 | qc->scsicmd == scmd) |
636 | break; |
637 | } |
638 | |
639 | if (i < ATA_MAX_QUEUE) { |
640 | /* the scmd has an associated qc */ |
641 | if (!(qc->flags & ATA_QCFLAG_EH)) { |
642 | /* which hasn't failed yet, timeout */ |
643 | qc->err_mask |= AC_ERR_TIMEOUT; |
644 | qc->flags |= ATA_QCFLAG_EH; |
645 | nr_timedout++; |
646 | } |
647 | } else { |
648 | /* Normal completion occurred after |
649 | * SCSI timeout but before this point. |
650 | * Successfully complete it. |
651 | */ |
652 | scmd->retries = scmd->allowed; |
653 | scsi_eh_finish_cmd(scmd, done_q: &ap->eh_done_q); |
654 | } |
655 | } |
656 | |
657 | /* |
658 | * If we have timed out qcs. They belong to EH from |
659 | * this point but the state of the controller is |
660 | * unknown. Freeze the port to make sure the IRQ |
661 | * handler doesn't diddle with those qcs. This must |
662 | * be done atomically w.r.t. setting ATA_QCFLAG_EH. |
663 | */ |
664 | if (nr_timedout) |
665 | __ata_port_freeze(ap); |
666 | |
667 | /* initialize eh_tries */ |
668 | ap->eh_tries = ATA_EH_MAX_TRIES; |
669 | |
670 | spin_unlock_irqrestore(lock: ap->lock, flags); |
671 | } |
672 | EXPORT_SYMBOL(ata_scsi_cmd_error_handler); |
673 | |
674 | /** |
675 | * ata_scsi_port_error_handler - recover the port after the commands |
676 | * @host: SCSI host containing the port |
677 | * @ap: the ATA port |
678 | * |
679 | * Handle the recovery of the port @ap after all the commands |
680 | * have been recovered. |
681 | */ |
682 | void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) |
683 | { |
684 | unsigned long flags; |
685 | struct ata_link *link; |
686 | |
687 | /* acquire EH ownership */ |
688 | ata_eh_acquire(ap); |
689 | repeat: |
690 | /* kill fast drain timer */ |
691 | del_timer_sync(timer: &ap->fastdrain_timer); |
692 | |
693 | /* process port resume request */ |
694 | ata_eh_handle_port_resume(ap); |
695 | |
696 | /* fetch & clear EH info */ |
697 | spin_lock_irqsave(ap->lock, flags); |
698 | |
699 | ata_for_each_link(link, ap, HOST_FIRST) { |
700 | struct ata_eh_context *ehc = &link->eh_context; |
701 | struct ata_device *dev; |
702 | |
703 | memset(&link->eh_context, 0, sizeof(link->eh_context)); |
704 | link->eh_context.i = link->eh_info; |
705 | memset(&link->eh_info, 0, sizeof(link->eh_info)); |
706 | |
707 | ata_for_each_dev(dev, link, ENABLED) { |
708 | int devno = dev->devno; |
709 | |
710 | ehc->saved_xfer_mode[devno] = dev->xfer_mode; |
711 | if (ata_ncq_enabled(dev)) |
712 | ehc->saved_ncq_enabled |= 1 << devno; |
713 | |
714 | /* If we are resuming, wake up the device */ |
715 | if (ap->pflags & ATA_PFLAG_RESUMING) { |
716 | dev->flags |= ATA_DFLAG_RESUMING; |
717 | ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; |
718 | } |
719 | } |
720 | } |
721 | |
722 | ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; |
723 | ap->pflags &= ~ATA_PFLAG_EH_PENDING; |
724 | ap->excl_link = NULL; /* don't maintain exclusion over EH */ |
725 | |
726 | spin_unlock_irqrestore(lock: ap->lock, flags); |
727 | |
728 | /* invoke EH, skip if unloading or suspended */ |
729 | if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) |
730 | ap->ops->error_handler(ap); |
731 | else { |
732 | /* if unloading, commence suicide */ |
733 | if ((ap->pflags & ATA_PFLAG_UNLOADING) && |
734 | !(ap->pflags & ATA_PFLAG_UNLOADED)) |
735 | ata_eh_unload(ap); |
736 | ata_eh_finish(ap); |
737 | } |
738 | |
739 | /* process port suspend request */ |
740 | ata_eh_handle_port_suspend(ap); |
741 | |
742 | /* |
743 | * Exception might have happened after ->error_handler recovered the |
744 | * port but before this point. Repeat EH in such case. |
745 | */ |
746 | spin_lock_irqsave(ap->lock, flags); |
747 | |
748 | if (ap->pflags & ATA_PFLAG_EH_PENDING) { |
749 | if (--ap->eh_tries) { |
750 | spin_unlock_irqrestore(lock: ap->lock, flags); |
751 | goto repeat; |
752 | } |
753 | ata_port_err(ap, |
754 | "EH pending after %d tries, giving up\n" , |
755 | ATA_EH_MAX_TRIES); |
756 | ap->pflags &= ~ATA_PFLAG_EH_PENDING; |
757 | } |
758 | |
759 | /* this run is complete, make sure EH info is clear */ |
760 | ata_for_each_link(link, ap, HOST_FIRST) |
761 | memset(&link->eh_info, 0, sizeof(link->eh_info)); |
762 | |
763 | /* |
764 | * end eh (clear host_eh_scheduled) while holding ap->lock such that if |
765 | * exception occurs after this point but before EH completion, SCSI |
766 | * midlayer will re-initiate EH. |
767 | */ |
768 | ap->ops->end_eh(ap); |
769 | |
770 | spin_unlock_irqrestore(lock: ap->lock, flags); |
771 | ata_eh_release(ap); |
772 | |
773 | scsi_eh_flush_done_q(done_q: &ap->eh_done_q); |
774 | |
775 | /* clean up */ |
776 | spin_lock_irqsave(ap->lock, flags); |
777 | |
778 | ap->pflags &= ~ATA_PFLAG_RESUMING; |
779 | |
780 | if (ap->pflags & ATA_PFLAG_LOADING) |
781 | ap->pflags &= ~ATA_PFLAG_LOADING; |
782 | else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && |
783 | !(ap->flags & ATA_FLAG_SAS_HOST)) |
784 | schedule_delayed_work(dwork: &ap->hotplug_task, delay: 0); |
785 | |
786 | if (ap->pflags & ATA_PFLAG_RECOVERED) |
787 | ata_port_info(ap, "EH complete\n" ); |
788 | |
789 | ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); |
790 | |
791 | /* tell wait_eh that we're done */ |
792 | ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; |
793 | wake_up_all(&ap->eh_wait_q); |
794 | |
795 | spin_unlock_irqrestore(lock: ap->lock, flags); |
796 | } |
797 | EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); |
798 | |
799 | /** |
800 | * ata_port_wait_eh - Wait for the currently pending EH to complete |
801 | * @ap: Port to wait EH for |
802 | * |
803 | * Wait until the currently pending EH is complete. |
804 | * |
805 | * LOCKING: |
806 | * Kernel thread context (may sleep). |
807 | */ |
808 | void ata_port_wait_eh(struct ata_port *ap) |
809 | { |
810 | unsigned long flags; |
811 | DEFINE_WAIT(wait); |
812 | |
813 | retry: |
814 | spin_lock_irqsave(ap->lock, flags); |
815 | |
816 | while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { |
817 | prepare_to_wait(wq_head: &ap->eh_wait_q, wq_entry: &wait, TASK_UNINTERRUPTIBLE); |
818 | spin_unlock_irqrestore(lock: ap->lock, flags); |
819 | schedule(); |
820 | spin_lock_irqsave(ap->lock, flags); |
821 | } |
822 | finish_wait(wq_head: &ap->eh_wait_q, wq_entry: &wait); |
823 | |
824 | spin_unlock_irqrestore(lock: ap->lock, flags); |
825 | |
826 | /* make sure SCSI EH is complete */ |
827 | if (scsi_host_in_recovery(shost: ap->scsi_host)) { |
828 | ata_msleep(ap, msecs: 10); |
829 | goto retry; |
830 | } |
831 | } |
832 | EXPORT_SYMBOL_GPL(ata_port_wait_eh); |
833 | |
834 | static unsigned int ata_eh_nr_in_flight(struct ata_port *ap) |
835 | { |
836 | struct ata_queued_cmd *qc; |
837 | unsigned int tag; |
838 | unsigned int nr = 0; |
839 | |
840 | /* count only non-internal commands */ |
841 | ata_qc_for_each(ap, qc, tag) { |
842 | if (qc) |
843 | nr++; |
844 | } |
845 | |
846 | return nr; |
847 | } |
848 | |
849 | void ata_eh_fastdrain_timerfn(struct timer_list *t) |
850 | { |
851 | struct ata_port *ap = from_timer(ap, t, fastdrain_timer); |
852 | unsigned long flags; |
853 | unsigned int cnt; |
854 | |
855 | spin_lock_irqsave(ap->lock, flags); |
856 | |
857 | cnt = ata_eh_nr_in_flight(ap); |
858 | |
859 | /* are we done? */ |
860 | if (!cnt) |
861 | goto out_unlock; |
862 | |
863 | if (cnt == ap->fastdrain_cnt) { |
864 | struct ata_queued_cmd *qc; |
865 | unsigned int tag; |
866 | |
867 | /* No progress during the last interval, tag all |
868 | * in-flight qcs as timed out and freeze the port. |
869 | */ |
870 | ata_qc_for_each(ap, qc, tag) { |
871 | if (qc) |
872 | qc->err_mask |= AC_ERR_TIMEOUT; |
873 | } |
874 | |
875 | ata_port_freeze(ap); |
876 | } else { |
877 | /* some qcs have finished, give it another chance */ |
878 | ap->fastdrain_cnt = cnt; |
879 | ap->fastdrain_timer.expires = |
880 | ata_deadline(from_jiffies: jiffies, timeout_msecs: ATA_EH_FASTDRAIN_INTERVAL); |
881 | add_timer(timer: &ap->fastdrain_timer); |
882 | } |
883 | |
884 | out_unlock: |
885 | spin_unlock_irqrestore(lock: ap->lock, flags); |
886 | } |
887 | |
888 | /** |
889 | * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain |
890 | * @ap: target ATA port |
891 | * @fastdrain: activate fast drain |
892 | * |
893 | * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain |
894 | * is non-zero and EH wasn't pending before. Fast drain ensures |
895 | * that EH kicks in in timely manner. |
896 | * |
897 | * LOCKING: |
898 | * spin_lock_irqsave(host lock) |
899 | */ |
900 | static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) |
901 | { |
902 | unsigned int cnt; |
903 | |
904 | /* already scheduled? */ |
905 | if (ap->pflags & ATA_PFLAG_EH_PENDING) |
906 | return; |
907 | |
908 | ap->pflags |= ATA_PFLAG_EH_PENDING; |
909 | |
910 | if (!fastdrain) |
911 | return; |
912 | |
913 | /* do we have in-flight qcs? */ |
914 | cnt = ata_eh_nr_in_flight(ap); |
915 | if (!cnt) |
916 | return; |
917 | |
918 | /* activate fast drain */ |
919 | ap->fastdrain_cnt = cnt; |
920 | ap->fastdrain_timer.expires = |
921 | ata_deadline(from_jiffies: jiffies, timeout_msecs: ATA_EH_FASTDRAIN_INTERVAL); |
922 | add_timer(timer: &ap->fastdrain_timer); |
923 | } |
924 | |
925 | /** |
926 | * ata_qc_schedule_eh - schedule qc for error handling |
927 | * @qc: command to schedule error handling for |
928 | * |
929 | * Schedule error handling for @qc. EH will kick in as soon as |
930 | * other commands are drained. |
931 | * |
932 | * LOCKING: |
933 | * spin_lock_irqsave(host lock) |
934 | */ |
935 | void ata_qc_schedule_eh(struct ata_queued_cmd *qc) |
936 | { |
937 | struct ata_port *ap = qc->ap; |
938 | |
939 | qc->flags |= ATA_QCFLAG_EH; |
940 | ata_eh_set_pending(ap, fastdrain: 1); |
941 | |
942 | /* The following will fail if timeout has already expired. |
943 | * ata_scsi_error() takes care of such scmds on EH entry. |
944 | * Note that ATA_QCFLAG_EH is unconditionally set after |
945 | * this function completes. |
946 | */ |
947 | blk_abort_request(scsi_cmd_to_rq(scmd: qc->scsicmd)); |
948 | } |
949 | |
950 | /** |
951 | * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine |
952 | * @ap: ATA port to schedule EH for |
953 | * |
954 | * LOCKING: inherited from ata_port_schedule_eh |
955 | * spin_lock_irqsave(host lock) |
956 | */ |
957 | void ata_std_sched_eh(struct ata_port *ap) |
958 | { |
959 | if (ap->pflags & ATA_PFLAG_INITIALIZING) |
960 | return; |
961 | |
962 | ata_eh_set_pending(ap, fastdrain: 1); |
963 | scsi_schedule_eh(shost: ap->scsi_host); |
964 | |
965 | trace_ata_std_sched_eh(ap); |
966 | } |
967 | EXPORT_SYMBOL_GPL(ata_std_sched_eh); |
968 | |
969 | /** |
970 | * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine |
971 | * @ap: ATA port to end EH for |
972 | * |
973 | * In the libata object model there is a 1:1 mapping of ata_port to |
974 | * shost, so host fields can be directly manipulated under ap->lock, in |
975 | * the libsas case we need to hold a lock at the ha->level to coordinate |
976 | * these events. |
977 | * |
978 | * LOCKING: |
979 | * spin_lock_irqsave(host lock) |
980 | */ |
981 | void ata_std_end_eh(struct ata_port *ap) |
982 | { |
983 | struct Scsi_Host *host = ap->scsi_host; |
984 | |
985 | host->host_eh_scheduled = 0; |
986 | } |
987 | EXPORT_SYMBOL(ata_std_end_eh); |
988 | |
989 | |
990 | /** |
991 | * ata_port_schedule_eh - schedule error handling without a qc |
992 | * @ap: ATA port to schedule EH for |
993 | * |
994 | * Schedule error handling for @ap. EH will kick in as soon as |
995 | * all commands are drained. |
996 | * |
997 | * LOCKING: |
998 | * spin_lock_irqsave(host lock) |
999 | */ |
1000 | void ata_port_schedule_eh(struct ata_port *ap) |
1001 | { |
1002 | /* see: ata_std_sched_eh, unless you know better */ |
1003 | ap->ops->sched_eh(ap); |
1004 | } |
1005 | EXPORT_SYMBOL_GPL(ata_port_schedule_eh); |
1006 | |
1007 | static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) |
1008 | { |
1009 | struct ata_queued_cmd *qc; |
1010 | int tag, nr_aborted = 0; |
1011 | |
1012 | /* we're gonna abort all commands, no need for fast drain */ |
1013 | ata_eh_set_pending(ap, fastdrain: 0); |
1014 | |
1015 | /* include internal tag in iteration */ |
1016 | ata_qc_for_each_with_internal(ap, qc, tag) { |
1017 | if (qc && (!link || qc->dev->link == link)) { |
1018 | qc->flags |= ATA_QCFLAG_EH; |
1019 | ata_qc_complete(qc); |
1020 | nr_aborted++; |
1021 | } |
1022 | } |
1023 | |
1024 | if (!nr_aborted) |
1025 | ata_port_schedule_eh(ap); |
1026 | |
1027 | return nr_aborted; |
1028 | } |
1029 | |
1030 | /** |
1031 | * ata_link_abort - abort all qc's on the link |
1032 | * @link: ATA link to abort qc's for |
1033 | * |
1034 | * Abort all active qc's active on @link and schedule EH. |
1035 | * |
1036 | * LOCKING: |
1037 | * spin_lock_irqsave(host lock) |
1038 | * |
1039 | * RETURNS: |
1040 | * Number of aborted qc's. |
1041 | */ |
1042 | int ata_link_abort(struct ata_link *link) |
1043 | { |
1044 | return ata_do_link_abort(ap: link->ap, link); |
1045 | } |
1046 | EXPORT_SYMBOL_GPL(ata_link_abort); |
1047 | |
1048 | /** |
1049 | * ata_port_abort - abort all qc's on the port |
1050 | * @ap: ATA port to abort qc's for |
1051 | * |
1052 | * Abort all active qc's of @ap and schedule EH. |
1053 | * |
1054 | * LOCKING: |
1055 | * spin_lock_irqsave(host_set lock) |
1056 | * |
1057 | * RETURNS: |
1058 | * Number of aborted qc's. |
1059 | */ |
1060 | int ata_port_abort(struct ata_port *ap) |
1061 | { |
1062 | return ata_do_link_abort(ap, NULL); |
1063 | } |
1064 | EXPORT_SYMBOL_GPL(ata_port_abort); |
1065 | |
1066 | /** |
1067 | * __ata_port_freeze - freeze port |
1068 | * @ap: ATA port to freeze |
1069 | * |
1070 | * This function is called when HSM violation or some other |
1071 | * condition disrupts normal operation of the port. Frozen port |
1072 | * is not allowed to perform any operation until the port is |
1073 | * thawed, which usually follows a successful reset. |
1074 | * |
1075 | * ap->ops->freeze() callback can be used for freezing the port |
1076 | * hardware-wise (e.g. mask interrupt and stop DMA engine). If a |
1077 | * port cannot be frozen hardware-wise, the interrupt handler |
1078 | * must ack and clear interrupts unconditionally while the port |
1079 | * is frozen. |
1080 | * |
1081 | * LOCKING: |
1082 | * spin_lock_irqsave(host lock) |
1083 | */ |
1084 | static void __ata_port_freeze(struct ata_port *ap) |
1085 | { |
1086 | if (ap->ops->freeze) |
1087 | ap->ops->freeze(ap); |
1088 | |
1089 | ap->pflags |= ATA_PFLAG_FROZEN; |
1090 | |
1091 | trace_ata_port_freeze(ap); |
1092 | } |
1093 | |
1094 | /** |
1095 | * ata_port_freeze - abort & freeze port |
1096 | * @ap: ATA port to freeze |
1097 | * |
1098 | * Abort and freeze @ap. The freeze operation must be called |
1099 | * first, because some hardware requires special operations |
1100 | * before the taskfile registers are accessible. |
1101 | * |
1102 | * LOCKING: |
1103 | * spin_lock_irqsave(host lock) |
1104 | * |
1105 | * RETURNS: |
1106 | * Number of aborted commands. |
1107 | */ |
1108 | int ata_port_freeze(struct ata_port *ap) |
1109 | { |
1110 | __ata_port_freeze(ap); |
1111 | |
1112 | return ata_port_abort(ap); |
1113 | } |
1114 | EXPORT_SYMBOL_GPL(ata_port_freeze); |
1115 | |
1116 | /** |
1117 | * ata_eh_freeze_port - EH helper to freeze port |
1118 | * @ap: ATA port to freeze |
1119 | * |
1120 | * Freeze @ap. |
1121 | * |
1122 | * LOCKING: |
1123 | * None. |
1124 | */ |
1125 | void ata_eh_freeze_port(struct ata_port *ap) |
1126 | { |
1127 | unsigned long flags; |
1128 | |
1129 | spin_lock_irqsave(ap->lock, flags); |
1130 | __ata_port_freeze(ap); |
1131 | spin_unlock_irqrestore(lock: ap->lock, flags); |
1132 | } |
1133 | EXPORT_SYMBOL_GPL(ata_eh_freeze_port); |
1134 | |
1135 | /** |
1136 | * ata_eh_thaw_port - EH helper to thaw port |
1137 | * @ap: ATA port to thaw |
1138 | * |
1139 | * Thaw frozen port @ap. |
1140 | * |
1141 | * LOCKING: |
1142 | * None. |
1143 | */ |
1144 | void ata_eh_thaw_port(struct ata_port *ap) |
1145 | { |
1146 | unsigned long flags; |
1147 | |
1148 | spin_lock_irqsave(ap->lock, flags); |
1149 | |
1150 | ap->pflags &= ~ATA_PFLAG_FROZEN; |
1151 | |
1152 | if (ap->ops->thaw) |
1153 | ap->ops->thaw(ap); |
1154 | |
1155 | spin_unlock_irqrestore(lock: ap->lock, flags); |
1156 | |
1157 | trace_ata_port_thaw(ap); |
1158 | } |
1159 | |
1160 | static void ata_eh_scsidone(struct scsi_cmnd *scmd) |
1161 | { |
1162 | /* nada */ |
1163 | } |
1164 | |
1165 | static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) |
1166 | { |
1167 | struct ata_port *ap = qc->ap; |
1168 | struct scsi_cmnd *scmd = qc->scsicmd; |
1169 | unsigned long flags; |
1170 | |
1171 | spin_lock_irqsave(ap->lock, flags); |
1172 | qc->scsidone = ata_eh_scsidone; |
1173 | __ata_qc_complete(qc); |
1174 | WARN_ON(ata_tag_valid(qc->tag)); |
1175 | spin_unlock_irqrestore(lock: ap->lock, flags); |
1176 | |
1177 | scsi_eh_finish_cmd(scmd, done_q: &ap->eh_done_q); |
1178 | } |
1179 | |
1180 | /** |
1181 | * ata_eh_qc_complete - Complete an active ATA command from EH |
1182 | * @qc: Command to complete |
1183 | * |
1184 | * Indicate to the mid and upper layers that an ATA command has |
1185 | * completed. To be used from EH. |
1186 | */ |
1187 | void ata_eh_qc_complete(struct ata_queued_cmd *qc) |
1188 | { |
1189 | struct scsi_cmnd *scmd = qc->scsicmd; |
1190 | scmd->retries = scmd->allowed; |
1191 | __ata_eh_qc_complete(qc); |
1192 | } |
1193 | |
1194 | /** |
1195 | * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH |
1196 | * @qc: Command to retry |
1197 | * |
1198 | * Indicate to the mid and upper layers that an ATA command |
1199 | * should be retried. To be used from EH. |
1200 | * |
1201 | * SCSI midlayer limits the number of retries to scmd->allowed. |
1202 | * scmd->allowed is incremented for commands which get retried |
1203 | * due to unrelated failures (qc->err_mask is zero). |
1204 | */ |
1205 | void ata_eh_qc_retry(struct ata_queued_cmd *qc) |
1206 | { |
1207 | struct scsi_cmnd *scmd = qc->scsicmd; |
1208 | if (!qc->err_mask) |
1209 | scmd->allowed++; |
1210 | __ata_eh_qc_complete(qc); |
1211 | } |
1212 | |
1213 | /** |
1214 | * ata_dev_disable - disable ATA device |
1215 | * @dev: ATA device to disable |
1216 | * |
1217 | * Disable @dev. |
1218 | * |
1219 | * Locking: |
1220 | * EH context. |
1221 | */ |
1222 | void ata_dev_disable(struct ata_device *dev) |
1223 | { |
1224 | if (!ata_dev_enabled(dev)) |
1225 | return; |
1226 | |
1227 | ata_dev_warn(dev, "disable device\n" ); |
1228 | |
1229 | ata_eh_dev_disable(dev); |
1230 | } |
1231 | EXPORT_SYMBOL_GPL(ata_dev_disable); |
1232 | |
1233 | /** |
1234 | * ata_eh_detach_dev - detach ATA device |
1235 | * @dev: ATA device to detach |
1236 | * |
1237 | * Detach @dev. |
1238 | * |
1239 | * LOCKING: |
1240 | * None. |
1241 | */ |
1242 | void ata_eh_detach_dev(struct ata_device *dev) |
1243 | { |
1244 | struct ata_link *link = dev->link; |
1245 | struct ata_port *ap = link->ap; |
1246 | struct ata_eh_context *ehc = &link->eh_context; |
1247 | unsigned long flags; |
1248 | |
1249 | /* |
1250 | * If the device is still enabled, transition it to standby power mode |
1251 | * (i.e. spin down HDDs) and disable it. |
1252 | */ |
1253 | if (ata_dev_enabled(dev)) { |
1254 | ata_dev_power_set_standby(dev); |
1255 | ata_eh_dev_disable(dev); |
1256 | } |
1257 | |
1258 | spin_lock_irqsave(ap->lock, flags); |
1259 | |
1260 | dev->flags &= ~ATA_DFLAG_DETACH; |
1261 | |
1262 | if (ata_scsi_offline_dev(dev)) { |
1263 | dev->flags |= ATA_DFLAG_DETACHED; |
1264 | ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; |
1265 | } |
1266 | |
1267 | /* clear per-dev EH info */ |
1268 | ata_eh_clear_action(link, dev, ehi: &link->eh_info, action: ATA_EH_PERDEV_MASK); |
1269 | ata_eh_clear_action(link, dev, ehi: &link->eh_context.i, action: ATA_EH_PERDEV_MASK); |
1270 | ehc->saved_xfer_mode[dev->devno] = 0; |
1271 | ehc->saved_ncq_enabled &= ~(1 << dev->devno); |
1272 | |
1273 | spin_unlock_irqrestore(lock: ap->lock, flags); |
1274 | } |
1275 | |
1276 | /** |
1277 | * ata_eh_about_to_do - about to perform eh_action |
1278 | * @link: target ATA link |
1279 | * @dev: target ATA dev for per-dev action (can be NULL) |
1280 | * @action: action about to be performed |
1281 | * |
1282 | * Called just before performing EH actions to clear related bits |
1283 | * in @link->eh_info such that eh actions are not unnecessarily |
1284 | * repeated. |
1285 | * |
1286 | * LOCKING: |
1287 | * None. |
1288 | */ |
1289 | void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, |
1290 | unsigned int action) |
1291 | { |
1292 | struct ata_port *ap = link->ap; |
1293 | struct ata_eh_info *ehi = &link->eh_info; |
1294 | struct ata_eh_context *ehc = &link->eh_context; |
1295 | unsigned long flags; |
1296 | |
1297 | trace_ata_eh_about_to_do(link, devno: dev ? dev->devno : 0, eh_action: action); |
1298 | |
1299 | spin_lock_irqsave(ap->lock, flags); |
1300 | |
1301 | ata_eh_clear_action(link, dev, ehi, action); |
1302 | |
1303 | /* About to take EH action, set RECOVERED. Ignore actions on |
1304 | * slave links as master will do them again. |
1305 | */ |
1306 | if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) |
1307 | ap->pflags |= ATA_PFLAG_RECOVERED; |
1308 | |
1309 | spin_unlock_irqrestore(lock: ap->lock, flags); |
1310 | } |
1311 | |
1312 | /** |
1313 | * ata_eh_done - EH action complete |
1314 | * @link: ATA link for which EH actions are complete |
1315 | * @dev: target ATA dev for per-dev action (can be NULL) |
1316 | * @action: action just completed |
1317 | * |
1318 | * Called right after performing EH actions to clear related bits |
1319 | * in @link->eh_context. |
1320 | * |
1321 | * LOCKING: |
1322 | * None. |
1323 | */ |
1324 | void ata_eh_done(struct ata_link *link, struct ata_device *dev, |
1325 | unsigned int action) |
1326 | { |
1327 | struct ata_eh_context *ehc = &link->eh_context; |
1328 | |
1329 | trace_ata_eh_done(link, devno: dev ? dev->devno : 0, eh_action: action); |
1330 | |
1331 | ata_eh_clear_action(link, dev, ehi: &ehc->i, action); |
1332 | } |
1333 | |
1334 | /** |
1335 | * ata_err_string - convert err_mask to descriptive string |
1336 | * @err_mask: error mask to convert to string |
1337 | * |
1338 | * Convert @err_mask to descriptive string. Errors are |
1339 | * prioritized according to severity and only the most severe |
1340 | * error is reported. |
1341 | * |
1342 | * LOCKING: |
1343 | * None. |
1344 | * |
1345 | * RETURNS: |
1346 | * Descriptive string for @err_mask |
1347 | */ |
1348 | static const char *ata_err_string(unsigned int err_mask) |
1349 | { |
1350 | if (err_mask & AC_ERR_HOST_BUS) |
1351 | return "host bus error" ; |
1352 | if (err_mask & AC_ERR_ATA_BUS) |
1353 | return "ATA bus error" ; |
1354 | if (err_mask & AC_ERR_TIMEOUT) |
1355 | return "timeout" ; |
1356 | if (err_mask & AC_ERR_HSM) |
1357 | return "HSM violation" ; |
1358 | if (err_mask & AC_ERR_SYSTEM) |
1359 | return "internal error" ; |
1360 | if (err_mask & AC_ERR_MEDIA) |
1361 | return "media error" ; |
1362 | if (err_mask & AC_ERR_INVALID) |
1363 | return "invalid argument" ; |
1364 | if (err_mask & AC_ERR_DEV) |
1365 | return "device error" ; |
1366 | if (err_mask & AC_ERR_NCQ) |
1367 | return "NCQ error" ; |
1368 | if (err_mask & AC_ERR_NODEV_HINT) |
1369 | return "Polling detection error" ; |
1370 | return "unknown error" ; |
1371 | } |
1372 | |
1373 | /** |
1374 | * atapi_eh_tur - perform ATAPI TEST_UNIT_READY |
1375 | * @dev: target ATAPI device |
1376 | * @r_sense_key: out parameter for sense_key |
1377 | * |
1378 | * Perform ATAPI TEST_UNIT_READY. |
1379 | * |
1380 | * LOCKING: |
1381 | * EH context (may sleep). |
1382 | * |
1383 | * RETURNS: |
1384 | * 0 on success, AC_ERR_* mask on failure. |
1385 | */ |
1386 | unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) |
1387 | { |
1388 | u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; |
1389 | struct ata_taskfile tf; |
1390 | unsigned int err_mask; |
1391 | |
1392 | ata_tf_init(dev, tf: &tf); |
1393 | |
1394 | tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; |
1395 | tf.command = ATA_CMD_PACKET; |
1396 | tf.protocol = ATAPI_PROT_NODATA; |
1397 | |
1398 | err_mask = ata_exec_internal(dev, tf: &tf, cdb, dma_dir: DMA_NONE, NULL, buflen: 0, timeout: 0); |
1399 | if (err_mask == AC_ERR_DEV) |
1400 | *r_sense_key = tf.error >> 4; |
1401 | return err_mask; |
1402 | } |
1403 | |
1404 | /** |
1405 | * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT |
1406 | * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to |
1407 | * |
1408 | * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK |
1409 | * SENSE. This function is an EH helper. |
1410 | * |
1411 | * LOCKING: |
1412 | * Kernel thread context (may sleep). |
1413 | * |
1414 | * RETURNS: |
1415 | * true if sense data could be fetched, false otherwise. |
1416 | */ |
1417 | static bool ata_eh_request_sense(struct ata_queued_cmd *qc) |
1418 | { |
1419 | struct scsi_cmnd *cmd = qc->scsicmd; |
1420 | struct ata_device *dev = qc->dev; |
1421 | struct ata_taskfile tf; |
1422 | unsigned int err_mask; |
1423 | |
1424 | if (ata_port_is_frozen(ap: qc->ap)) { |
1425 | ata_dev_warn(dev, "sense data available but port frozen\n" ); |
1426 | return false; |
1427 | } |
1428 | |
1429 | if (!ata_id_sense_reporting_enabled(id: dev->id)) { |
1430 | ata_dev_warn(qc->dev, "sense data reporting disabled\n" ); |
1431 | return false; |
1432 | } |
1433 | |
1434 | ata_tf_init(dev, tf: &tf); |
1435 | tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; |
1436 | tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; |
1437 | tf.command = ATA_CMD_REQ_SENSE_DATA; |
1438 | tf.protocol = ATA_PROT_NODATA; |
1439 | |
1440 | err_mask = ata_exec_internal(dev, tf: &tf, NULL, dma_dir: DMA_NONE, NULL, buflen: 0, timeout: 0); |
1441 | /* Ignore err_mask; ATA_ERR might be set */ |
1442 | if (tf.status & ATA_SENSE) { |
1443 | if (ata_scsi_sense_is_valid(sk: tf.lbah, asc: tf.lbam, ascq: tf.lbal)) { |
1444 | /* Set sense without also setting scsicmd->result */ |
1445 | scsi_build_sense_buffer(desc: dev->flags & ATA_DFLAG_D_SENSE, |
1446 | buf: cmd->sense_buffer, key: tf.lbah, |
1447 | asc: tf.lbam, ascq: tf.lbal); |
1448 | qc->flags |= ATA_QCFLAG_SENSE_VALID; |
1449 | return true; |
1450 | } |
1451 | } else { |
1452 | ata_dev_warn(dev, "request sense failed stat %02x emask %x\n" , |
1453 | tf.status, err_mask); |
1454 | } |
1455 | |
1456 | return false; |
1457 | } |
1458 | |
1459 | /** |
1460 | * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE |
1461 | * @dev: device to perform REQUEST_SENSE to |
1462 | * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) |
1463 | * @dfl_sense_key: default sense key to use |
1464 | * |
1465 | * Perform ATAPI REQUEST_SENSE after the device reported CHECK |
1466 | * SENSE. This function is EH helper. |
1467 | * |
1468 | * LOCKING: |
1469 | * Kernel thread context (may sleep). |
1470 | * |
1471 | * RETURNS: |
1472 | * 0 on success, AC_ERR_* mask on failure |
1473 | */ |
1474 | unsigned int atapi_eh_request_sense(struct ata_device *dev, |
1475 | u8 *sense_buf, u8 dfl_sense_key) |
1476 | { |
1477 | u8 cdb[ATAPI_CDB_LEN] = |
1478 | { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; |
1479 | struct ata_port *ap = dev->link->ap; |
1480 | struct ata_taskfile tf; |
1481 | |
1482 | memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); |
1483 | |
1484 | /* initialize sense_buf with the error register, |
1485 | * for the case where they are -not- overwritten |
1486 | */ |
1487 | sense_buf[0] = 0x70; |
1488 | sense_buf[2] = dfl_sense_key; |
1489 | |
1490 | /* some devices time out if garbage left in tf */ |
1491 | ata_tf_init(dev, tf: &tf); |
1492 | |
1493 | tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; |
1494 | tf.command = ATA_CMD_PACKET; |
1495 | |
1496 | /* is it pointless to prefer PIO for "safety reasons"? */ |
1497 | if (ap->flags & ATA_FLAG_PIO_DMA) { |
1498 | tf.protocol = ATAPI_PROT_DMA; |
1499 | tf.feature |= ATAPI_PKT_DMA; |
1500 | } else { |
1501 | tf.protocol = ATAPI_PROT_PIO; |
1502 | tf.lbam = SCSI_SENSE_BUFFERSIZE; |
1503 | tf.lbah = 0; |
1504 | } |
1505 | |
1506 | return ata_exec_internal(dev, tf: &tf, cdb, dma_dir: DMA_FROM_DEVICE, |
1507 | buf: sense_buf, SCSI_SENSE_BUFFERSIZE, timeout: 0); |
1508 | } |
1509 | |
1510 | /** |
1511 | * ata_eh_analyze_serror - analyze SError for a failed port |
1512 | * @link: ATA link to analyze SError for |
1513 | * |
1514 | * Analyze SError if available and further determine cause of |
1515 | * failure. |
1516 | * |
1517 | * LOCKING: |
1518 | * None. |
1519 | */ |
1520 | static void ata_eh_analyze_serror(struct ata_link *link) |
1521 | { |
1522 | struct ata_eh_context *ehc = &link->eh_context; |
1523 | u32 serror = ehc->i.serror; |
1524 | unsigned int err_mask = 0, action = 0; |
1525 | u32 hotplug_mask; |
1526 | |
1527 | if (serror & (SERR_PERSISTENT | SERR_DATA)) { |
1528 | err_mask |= AC_ERR_ATA_BUS; |
1529 | action |= ATA_EH_RESET; |
1530 | } |
1531 | if (serror & SERR_PROTOCOL) { |
1532 | err_mask |= AC_ERR_HSM; |
1533 | action |= ATA_EH_RESET; |
1534 | } |
1535 | if (serror & SERR_INTERNAL) { |
1536 | err_mask |= AC_ERR_SYSTEM; |
1537 | action |= ATA_EH_RESET; |
1538 | } |
1539 | |
1540 | /* Determine whether a hotplug event has occurred. Both |
1541 | * SError.N/X are considered hotplug events for enabled or |
1542 | * host links. For disabled PMP links, only N bit is |
1543 | * considered as X bit is left at 1 for link plugging. |
1544 | */ |
1545 | if (link->lpm_policy > ATA_LPM_MAX_POWER) |
1546 | hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ |
1547 | else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) |
1548 | hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; |
1549 | else |
1550 | hotplug_mask = SERR_PHYRDY_CHG; |
1551 | |
1552 | if (serror & hotplug_mask) |
1553 | ata_ehi_hotplugged(ehi: &ehc->i); |
1554 | |
1555 | ehc->i.err_mask |= err_mask; |
1556 | ehc->i.action |= action; |
1557 | } |
1558 | |
1559 | /** |
1560 | * ata_eh_analyze_tf - analyze taskfile of a failed qc |
1561 | * @qc: qc to analyze |
1562 | * |
1563 | * Analyze taskfile of @qc and further determine cause of |
1564 | * failure. This function also requests ATAPI sense data if |
1565 | * available. |
1566 | * |
1567 | * LOCKING: |
1568 | * Kernel thread context (may sleep). |
1569 | * |
1570 | * RETURNS: |
1571 | * Determined recovery action |
1572 | */ |
1573 | static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc) |
1574 | { |
1575 | const struct ata_taskfile *tf = &qc->result_tf; |
1576 | unsigned int tmp, action = 0; |
1577 | u8 stat = tf->status, err = tf->error; |
1578 | |
1579 | if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { |
1580 | qc->err_mask |= AC_ERR_HSM; |
1581 | return ATA_EH_RESET; |
1582 | } |
1583 | |
1584 | if (stat & (ATA_ERR | ATA_DF)) { |
1585 | qc->err_mask |= AC_ERR_DEV; |
1586 | /* |
1587 | * Sense data reporting does not work if the |
1588 | * device fault bit is set. |
1589 | */ |
1590 | if (stat & ATA_DF) |
1591 | stat &= ~ATA_SENSE; |
1592 | } else { |
1593 | return 0; |
1594 | } |
1595 | |
1596 | switch (qc->dev->class) { |
1597 | case ATA_DEV_ATA: |
1598 | case ATA_DEV_ZAC: |
1599 | /* |
1600 | * Fetch the sense data explicitly if: |
1601 | * -It was a non-NCQ command that failed, or |
1602 | * -It was a NCQ command that failed, but the sense data |
1603 | * was not included in the NCQ command error log |
1604 | * (i.e. NCQ autosense is not supported by the device). |
1605 | */ |
1606 | if (!(qc->flags & ATA_QCFLAG_SENSE_VALID) && |
1607 | (stat & ATA_SENSE) && ata_eh_request_sense(qc)) |
1608 | set_status_byte(cmd: qc->scsicmd, status: SAM_STAT_CHECK_CONDITION); |
1609 | if (err & ATA_ICRC) |
1610 | qc->err_mask |= AC_ERR_ATA_BUS; |
1611 | if (err & (ATA_UNC | ATA_AMNF)) |
1612 | qc->err_mask |= AC_ERR_MEDIA; |
1613 | if (err & ATA_IDNF) |
1614 | qc->err_mask |= AC_ERR_INVALID; |
1615 | break; |
1616 | |
1617 | case ATA_DEV_ATAPI: |
1618 | if (!ata_port_is_frozen(ap: qc->ap)) { |
1619 | tmp = atapi_eh_request_sense(dev: qc->dev, |
1620 | sense_buf: qc->scsicmd->sense_buffer, |
1621 | dfl_sense_key: qc->result_tf.error >> 4); |
1622 | if (!tmp) |
1623 | qc->flags |= ATA_QCFLAG_SENSE_VALID; |
1624 | else |
1625 | qc->err_mask |= tmp; |
1626 | } |
1627 | } |
1628 | |
1629 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) { |
1630 | enum scsi_disposition ret = scsi_check_sense(qc->scsicmd); |
1631 | /* |
1632 | * SUCCESS here means that the sense code could be |
1633 | * evaluated and should be passed to the upper layers |
1634 | * for correct evaluation. |
1635 | * FAILED means the sense code could not be interpreted |
1636 | * and the device would need to be reset. |
1637 | * NEEDS_RETRY and ADD_TO_MLQUEUE means that the |
1638 | * command would need to be retried. |
1639 | */ |
1640 | if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { |
1641 | qc->flags |= ATA_QCFLAG_RETRY; |
1642 | qc->err_mask |= AC_ERR_OTHER; |
1643 | } else if (ret != SUCCESS) { |
1644 | qc->err_mask |= AC_ERR_HSM; |
1645 | } |
1646 | } |
1647 | if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) |
1648 | action |= ATA_EH_RESET; |
1649 | |
1650 | return action; |
1651 | } |
1652 | |
1653 | static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, |
1654 | int *xfer_ok) |
1655 | { |
1656 | int base = 0; |
1657 | |
1658 | if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) |
1659 | *xfer_ok = 1; |
1660 | |
1661 | if (!*xfer_ok) |
1662 | base = ATA_ECAT_DUBIOUS_NONE; |
1663 | |
1664 | if (err_mask & AC_ERR_ATA_BUS) |
1665 | return base + ATA_ECAT_ATA_BUS; |
1666 | |
1667 | if (err_mask & AC_ERR_TIMEOUT) |
1668 | return base + ATA_ECAT_TOUT_HSM; |
1669 | |
1670 | if (eflags & ATA_EFLAG_IS_IO) { |
1671 | if (err_mask & AC_ERR_HSM) |
1672 | return base + ATA_ECAT_TOUT_HSM; |
1673 | if ((err_mask & |
1674 | (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) |
1675 | return base + ATA_ECAT_UNK_DEV; |
1676 | } |
1677 | |
1678 | return 0; |
1679 | } |
1680 | |
1681 | struct speed_down_verdict_arg { |
1682 | u64 since; |
1683 | int xfer_ok; |
1684 | int nr_errors[ATA_ECAT_NR]; |
1685 | }; |
1686 | |
1687 | static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) |
1688 | { |
1689 | struct speed_down_verdict_arg *arg = void_arg; |
1690 | int cat; |
1691 | |
1692 | if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) |
1693 | return -1; |
1694 | |
1695 | cat = ata_eh_categorize_error(eflags: ent->eflags, err_mask: ent->err_mask, |
1696 | xfer_ok: &arg->xfer_ok); |
1697 | arg->nr_errors[cat]++; |
1698 | |
1699 | return 0; |
1700 | } |
1701 | |
1702 | /** |
1703 | * ata_eh_speed_down_verdict - Determine speed down verdict |
1704 | * @dev: Device of interest |
1705 | * |
1706 | * This function examines error ring of @dev and determines |
1707 | * whether NCQ needs to be turned off, transfer speed should be |
1708 | * stepped down, or falling back to PIO is necessary. |
1709 | * |
1710 | * ECAT_ATA_BUS : ATA_BUS error for any command |
1711 | * |
1712 | * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for |
1713 | * IO commands |
1714 | * |
1715 | * ECAT_UNK_DEV : Unknown DEV error for IO commands |
1716 | * |
1717 | * ECAT_DUBIOUS_* : Identical to above three but occurred while |
1718 | * data transfer hasn't been verified. |
1719 | * |
1720 | * Verdicts are |
1721 | * |
1722 | * NCQ_OFF : Turn off NCQ. |
1723 | * |
1724 | * SPEED_DOWN : Speed down transfer speed but don't fall back |
1725 | * to PIO. |
1726 | * |
1727 | * FALLBACK_TO_PIO : Fall back to PIO. |
1728 | * |
1729 | * Even if multiple verdicts are returned, only one action is |
1730 | * taken per error. An action triggered by non-DUBIOUS errors |
1731 | * clears ering, while one triggered by DUBIOUS_* errors doesn't. |
1732 | * This is to expedite speed down decisions right after device is |
1733 | * initially configured. |
1734 | * |
1735 | * The following are speed down rules. #1 and #2 deal with |
1736 | * DUBIOUS errors. |
1737 | * |
1738 | * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors |
1739 | * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. |
1740 | * |
1741 | * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors |
1742 | * occurred during last 5 mins, NCQ_OFF. |
1743 | * |
1744 | * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors |
1745 | * occurred during last 5 mins, FALLBACK_TO_PIO |
1746 | * |
1747 | * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred |
1748 | * during last 10 mins, NCQ_OFF. |
1749 | * |
1750 | * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 |
1751 | * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. |
1752 | * |
1753 | * LOCKING: |
1754 | * Inherited from caller. |
1755 | * |
1756 | * RETURNS: |
1757 | * OR of ATA_EH_SPDN_* flags. |
1758 | */ |
1759 | static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) |
1760 | { |
1761 | const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; |
1762 | u64 j64 = get_jiffies_64(); |
1763 | struct speed_down_verdict_arg arg; |
1764 | unsigned int verdict = 0; |
1765 | |
1766 | /* scan past 5 mins of error history */ |
1767 | memset(&arg, 0, sizeof(arg)); |
1768 | arg.since = j64 - min(j64, j5mins); |
1769 | ata_ering_map(ering: &dev->ering, map_fn: speed_down_verdict_cb, arg: &arg); |
1770 | |
1771 | if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + |
1772 | arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) |
1773 | verdict |= ATA_EH_SPDN_SPEED_DOWN | |
1774 | ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; |
1775 | |
1776 | if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + |
1777 | arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) |
1778 | verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; |
1779 | |
1780 | if (arg.nr_errors[ATA_ECAT_ATA_BUS] + |
1781 | arg.nr_errors[ATA_ECAT_TOUT_HSM] + |
1782 | arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) |
1783 | verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; |
1784 | |
1785 | /* scan past 10 mins of error history */ |
1786 | memset(&arg, 0, sizeof(arg)); |
1787 | arg.since = j64 - min(j64, j10mins); |
1788 | ata_ering_map(ering: &dev->ering, map_fn: speed_down_verdict_cb, arg: &arg); |
1789 | |
1790 | if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + |
1791 | arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) |
1792 | verdict |= ATA_EH_SPDN_NCQ_OFF; |
1793 | |
1794 | if (arg.nr_errors[ATA_ECAT_ATA_BUS] + |
1795 | arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || |
1796 | arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) |
1797 | verdict |= ATA_EH_SPDN_SPEED_DOWN; |
1798 | |
1799 | return verdict; |
1800 | } |
1801 | |
1802 | /** |
1803 | * ata_eh_speed_down - record error and speed down if necessary |
1804 | * @dev: Failed device |
1805 | * @eflags: mask of ATA_EFLAG_* flags |
1806 | * @err_mask: err_mask of the error |
1807 | * |
1808 | * Record error and examine error history to determine whether |
1809 | * adjusting transmission speed is necessary. It also sets |
1810 | * transmission limits appropriately if such adjustment is |
1811 | * necessary. |
1812 | * |
1813 | * LOCKING: |
1814 | * Kernel thread context (may sleep). |
1815 | * |
1816 | * RETURNS: |
1817 | * Determined recovery action. |
1818 | */ |
1819 | static unsigned int ata_eh_speed_down(struct ata_device *dev, |
1820 | unsigned int eflags, unsigned int err_mask) |
1821 | { |
1822 | struct ata_link *link = ata_dev_phys_link(dev); |
1823 | int xfer_ok = 0; |
1824 | unsigned int verdict; |
1825 | unsigned int action = 0; |
1826 | |
1827 | /* don't bother if Cat-0 error */ |
1828 | if (ata_eh_categorize_error(eflags, err_mask, xfer_ok: &xfer_ok) == 0) |
1829 | return 0; |
1830 | |
1831 | /* record error and determine whether speed down is necessary */ |
1832 | ata_ering_record(ering: &dev->ering, eflags, err_mask); |
1833 | verdict = ata_eh_speed_down_verdict(dev); |
1834 | |
1835 | /* turn off NCQ? */ |
1836 | if ((verdict & ATA_EH_SPDN_NCQ_OFF) && ata_ncq_enabled(dev)) { |
1837 | dev->flags |= ATA_DFLAG_NCQ_OFF; |
1838 | ata_dev_warn(dev, "NCQ disabled due to excessive errors\n" ); |
1839 | goto done; |
1840 | } |
1841 | |
1842 | /* speed down? */ |
1843 | if (verdict & ATA_EH_SPDN_SPEED_DOWN) { |
1844 | /* speed down SATA link speed if possible */ |
1845 | if (sata_down_spd_limit(link, spd_limit: 0) == 0) { |
1846 | action |= ATA_EH_RESET; |
1847 | goto done; |
1848 | } |
1849 | |
1850 | /* lower transfer mode */ |
1851 | if (dev->spdn_cnt < 2) { |
1852 | static const int dma_dnxfer_sel[] = |
1853 | { ATA_DNXFER_DMA, ATA_DNXFER_40C }; |
1854 | static const int pio_dnxfer_sel[] = |
1855 | { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; |
1856 | int sel; |
1857 | |
1858 | if (dev->xfer_shift != ATA_SHIFT_PIO) |
1859 | sel = dma_dnxfer_sel[dev->spdn_cnt]; |
1860 | else |
1861 | sel = pio_dnxfer_sel[dev->spdn_cnt]; |
1862 | |
1863 | dev->spdn_cnt++; |
1864 | |
1865 | if (ata_down_xfermask_limit(dev, sel) == 0) { |
1866 | action |= ATA_EH_RESET; |
1867 | goto done; |
1868 | } |
1869 | } |
1870 | } |
1871 | |
1872 | /* Fall back to PIO? Slowing down to PIO is meaningless for |
1873 | * SATA ATA devices. Consider it only for PATA and SATAPI. |
1874 | */ |
1875 | if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && |
1876 | (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && |
1877 | (dev->xfer_shift != ATA_SHIFT_PIO)) { |
1878 | if (ata_down_xfermask_limit(dev, sel: ATA_DNXFER_FORCE_PIO) == 0) { |
1879 | dev->spdn_cnt = 0; |
1880 | action |= ATA_EH_RESET; |
1881 | goto done; |
1882 | } |
1883 | } |
1884 | |
1885 | return 0; |
1886 | done: |
1887 | /* device has been slowed down, blow error history */ |
1888 | if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) |
1889 | ata_ering_clear(ering: &dev->ering); |
1890 | return action; |
1891 | } |
1892 | |
1893 | /** |
1894 | * ata_eh_worth_retry - analyze error and decide whether to retry |
1895 | * @qc: qc to possibly retry |
1896 | * |
1897 | * Look at the cause of the error and decide if a retry |
1898 | * might be useful or not. We don't want to retry media errors |
1899 | * because the drive itself has probably already taken 10-30 seconds |
1900 | * doing its own internal retries before reporting the failure. |
1901 | */ |
1902 | static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) |
1903 | { |
1904 | if (qc->err_mask & AC_ERR_MEDIA) |
1905 | return 0; /* don't retry media errors */ |
1906 | if (qc->flags & ATA_QCFLAG_IO) |
1907 | return 1; /* otherwise retry anything from fs stack */ |
1908 | if (qc->err_mask & AC_ERR_INVALID) |
1909 | return 0; /* don't retry these */ |
1910 | return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ |
1911 | } |
1912 | |
1913 | /** |
1914 | * ata_eh_quiet - check if we need to be quiet about a command error |
1915 | * @qc: qc to check |
1916 | * |
1917 | * Look at the qc flags anbd its scsi command request flags to determine |
1918 | * if we need to be quiet about the command failure. |
1919 | */ |
1920 | static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) |
1921 | { |
1922 | if (qc->scsicmd && scsi_cmd_to_rq(scmd: qc->scsicmd)->rq_flags & RQF_QUIET) |
1923 | qc->flags |= ATA_QCFLAG_QUIET; |
1924 | return qc->flags & ATA_QCFLAG_QUIET; |
1925 | } |
1926 | |
1927 | static int ata_eh_read_sense_success_non_ncq(struct ata_link *link) |
1928 | { |
1929 | struct ata_port *ap = link->ap; |
1930 | struct ata_queued_cmd *qc; |
1931 | |
1932 | qc = __ata_qc_from_tag(ap, tag: link->active_tag); |
1933 | if (!qc) |
1934 | return -EIO; |
1935 | |
1936 | if (!(qc->flags & ATA_QCFLAG_EH) || |
1937 | !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || |
1938 | qc->err_mask) |
1939 | return -EIO; |
1940 | |
1941 | if (!ata_eh_request_sense(qc)) |
1942 | return -EIO; |
1943 | |
1944 | /* |
1945 | * If we have sense data, call scsi_check_sense() in order to set the |
1946 | * correct SCSI ML byte (if any). No point in checking the return value, |
1947 | * since the command has already completed successfully. |
1948 | */ |
1949 | scsi_check_sense(qc->scsicmd); |
1950 | |
1951 | return 0; |
1952 | } |
1953 | |
1954 | static void ata_eh_get_success_sense(struct ata_link *link) |
1955 | { |
1956 | struct ata_eh_context *ehc = &link->eh_context; |
1957 | struct ata_device *dev = link->device; |
1958 | struct ata_port *ap = link->ap; |
1959 | struct ata_queued_cmd *qc; |
1960 | int tag, ret = 0; |
1961 | |
1962 | if (!(ehc->i.dev_action[dev->devno] & ATA_EH_GET_SUCCESS_SENSE)) |
1963 | return; |
1964 | |
1965 | /* if frozen, we can't do much */ |
1966 | if (ata_port_is_frozen(ap)) { |
1967 | ata_dev_warn(dev, |
1968 | "successful sense data available but port frozen\n" ); |
1969 | goto out; |
1970 | } |
1971 | |
1972 | /* |
1973 | * If the link has sactive set, then we have outstanding NCQ commands |
1974 | * and have to read the Successful NCQ Commands log to get the sense |
1975 | * data. Otherwise, we are dealing with a non-NCQ command and use |
1976 | * request sense ext command to retrieve the sense data. |
1977 | */ |
1978 | if (link->sactive) |
1979 | ret = ata_eh_read_sense_success_ncq_log(link); |
1980 | else |
1981 | ret = ata_eh_read_sense_success_non_ncq(link); |
1982 | if (ret) |
1983 | goto out; |
1984 | |
1985 | ata_eh_done(link, dev, action: ATA_EH_GET_SUCCESS_SENSE); |
1986 | return; |
1987 | |
1988 | out: |
1989 | /* |
1990 | * If we failed to get sense data for a successful command that ought to |
1991 | * have sense data, we cannot simply return BLK_STS_OK to user space. |
1992 | * This is because we can't know if the sense data that we couldn't get |
1993 | * was actually "DATA CURRENTLY UNAVAILABLE". Reporting such a command |
1994 | * as success to user space would result in a silent data corruption. |
1995 | * Thus, add a bogus ABORTED_COMMAND sense data to such commands, such |
1996 | * that SCSI will report these commands as BLK_STS_IOERR to user space. |
1997 | */ |
1998 | ata_qc_for_each_raw(ap, qc, tag) { |
1999 | if (!(qc->flags & ATA_QCFLAG_EH) || |
2000 | !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || |
2001 | qc->err_mask || |
2002 | ata_dev_phys_link(dev: qc->dev) != link) |
2003 | continue; |
2004 | |
2005 | /* We managed to get sense for this success command, skip. */ |
2006 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) |
2007 | continue; |
2008 | |
2009 | /* This success command did not have any sense data, skip. */ |
2010 | if (!(qc->result_tf.status & ATA_SENSE)) |
2011 | continue; |
2012 | |
2013 | /* This success command had sense data, but we failed to get. */ |
2014 | ata_scsi_set_sense(dev, cmd: qc->scsicmd, ABORTED_COMMAND, asc: 0, ascq: 0); |
2015 | qc->flags |= ATA_QCFLAG_SENSE_VALID; |
2016 | } |
2017 | ata_eh_done(link, dev, action: ATA_EH_GET_SUCCESS_SENSE); |
2018 | } |
2019 | |
2020 | /** |
2021 | * ata_eh_link_autopsy - analyze error and determine recovery action |
2022 | * @link: host link to perform autopsy on |
2023 | * |
2024 | * Analyze why @link failed and determine which recovery actions |
2025 | * are needed. This function also sets more detailed AC_ERR_* |
2026 | * values and fills sense data for ATAPI CHECK SENSE. |
2027 | * |
2028 | * LOCKING: |
2029 | * Kernel thread context (may sleep). |
2030 | */ |
2031 | static void ata_eh_link_autopsy(struct ata_link *link) |
2032 | { |
2033 | struct ata_port *ap = link->ap; |
2034 | struct ata_eh_context *ehc = &link->eh_context; |
2035 | struct ata_queued_cmd *qc; |
2036 | struct ata_device *dev; |
2037 | unsigned int all_err_mask = 0, eflags = 0; |
2038 | int tag, nr_failed = 0, nr_quiet = 0; |
2039 | u32 serror; |
2040 | int rc; |
2041 | |
2042 | if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) |
2043 | return; |
2044 | |
2045 | /* obtain and analyze SError */ |
2046 | rc = sata_scr_read(link, reg: SCR_ERROR, val: &serror); |
2047 | if (rc == 0) { |
2048 | ehc->i.serror |= serror; |
2049 | ata_eh_analyze_serror(link); |
2050 | } else if (rc != -EOPNOTSUPP) { |
2051 | /* SError read failed, force reset and probing */ |
2052 | ehc->i.probe_mask |= ATA_ALL_DEVICES; |
2053 | ehc->i.action |= ATA_EH_RESET; |
2054 | ehc->i.err_mask |= AC_ERR_OTHER; |
2055 | } |
2056 | |
2057 | /* analyze NCQ failure */ |
2058 | ata_eh_analyze_ncq_error(link); |
2059 | |
2060 | /* |
2061 | * Check if this was a successful command that simply needs sense data. |
2062 | * Since the sense data is not part of the completion, we need to fetch |
2063 | * it using an additional command. Since this can't be done from irq |
2064 | * context, the sense data for successful commands are fetched by EH. |
2065 | */ |
2066 | ata_eh_get_success_sense(link); |
2067 | |
2068 | /* any real error trumps AC_ERR_OTHER */ |
2069 | if (ehc->i.err_mask & ~AC_ERR_OTHER) |
2070 | ehc->i.err_mask &= ~AC_ERR_OTHER; |
2071 | |
2072 | all_err_mask |= ehc->i.err_mask; |
2073 | |
2074 | ata_qc_for_each_raw(ap, qc, tag) { |
2075 | if (!(qc->flags & ATA_QCFLAG_EH) || |
2076 | qc->flags & ATA_QCFLAG_RETRY || |
2077 | qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD || |
2078 | ata_dev_phys_link(dev: qc->dev) != link) |
2079 | continue; |
2080 | |
2081 | /* inherit upper level err_mask */ |
2082 | qc->err_mask |= ehc->i.err_mask; |
2083 | |
2084 | /* analyze TF */ |
2085 | ehc->i.action |= ata_eh_analyze_tf(qc); |
2086 | |
2087 | /* DEV errors are probably spurious in case of ATA_BUS error */ |
2088 | if (qc->err_mask & AC_ERR_ATA_BUS) |
2089 | qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | |
2090 | AC_ERR_INVALID); |
2091 | |
2092 | /* any real error trumps unknown error */ |
2093 | if (qc->err_mask & ~AC_ERR_OTHER) |
2094 | qc->err_mask &= ~AC_ERR_OTHER; |
2095 | |
2096 | /* |
2097 | * SENSE_VALID trumps dev/unknown error and revalidation. Upper |
2098 | * layers will determine whether the command is worth retrying |
2099 | * based on the sense data and device class/type. Otherwise, |
2100 | * determine directly if the command is worth retrying using its |
2101 | * error mask and flags. |
2102 | */ |
2103 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) |
2104 | qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); |
2105 | else if (ata_eh_worth_retry(qc)) |
2106 | qc->flags |= ATA_QCFLAG_RETRY; |
2107 | |
2108 | /* accumulate error info */ |
2109 | ehc->i.dev = qc->dev; |
2110 | all_err_mask |= qc->err_mask; |
2111 | if (qc->flags & ATA_QCFLAG_IO) |
2112 | eflags |= ATA_EFLAG_IS_IO; |
2113 | trace_ata_eh_link_autopsy_qc(qc); |
2114 | |
2115 | /* Count quiet errors */ |
2116 | if (ata_eh_quiet(qc)) |
2117 | nr_quiet++; |
2118 | nr_failed++; |
2119 | } |
2120 | |
2121 | /* If all failed commands requested silence, then be quiet */ |
2122 | if (nr_quiet == nr_failed) |
2123 | ehc->i.flags |= ATA_EHI_QUIET; |
2124 | |
2125 | /* enforce default EH actions */ |
2126 | if (ata_port_is_frozen(ap) || |
2127 | all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) |
2128 | ehc->i.action |= ATA_EH_RESET; |
2129 | else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || |
2130 | (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) |
2131 | ehc->i.action |= ATA_EH_REVALIDATE; |
2132 | |
2133 | /* If we have offending qcs and the associated failed device, |
2134 | * perform per-dev EH action only on the offending device. |
2135 | */ |
2136 | if (ehc->i.dev) { |
2137 | ehc->i.dev_action[ehc->i.dev->devno] |= |
2138 | ehc->i.action & ATA_EH_PERDEV_MASK; |
2139 | ehc->i.action &= ~ATA_EH_PERDEV_MASK; |
2140 | } |
2141 | |
2142 | /* propagate timeout to host link */ |
2143 | if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) |
2144 | ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; |
2145 | |
2146 | /* record error and consider speeding down */ |
2147 | dev = ehc->i.dev; |
2148 | if (!dev && ((ata_link_max_devices(link) == 1 && |
2149 | ata_dev_enabled(dev: link->device)))) |
2150 | dev = link->device; |
2151 | |
2152 | if (dev) { |
2153 | if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) |
2154 | eflags |= ATA_EFLAG_DUBIOUS_XFER; |
2155 | ehc->i.action |= ata_eh_speed_down(dev, eflags, err_mask: all_err_mask); |
2156 | trace_ata_eh_link_autopsy(dev, eh_action: ehc->i.action, eh_err_mask: all_err_mask); |
2157 | } |
2158 | } |
2159 | |
2160 | /** |
2161 | * ata_eh_autopsy - analyze error and determine recovery action |
2162 | * @ap: host port to perform autopsy on |
2163 | * |
2164 | * Analyze all links of @ap and determine why they failed and |
2165 | * which recovery actions are needed. |
2166 | * |
2167 | * LOCKING: |
2168 | * Kernel thread context (may sleep). |
2169 | */ |
2170 | void ata_eh_autopsy(struct ata_port *ap) |
2171 | { |
2172 | struct ata_link *link; |
2173 | |
2174 | ata_for_each_link(link, ap, EDGE) |
2175 | ata_eh_link_autopsy(link); |
2176 | |
2177 | /* Handle the frigging slave link. Autopsy is done similarly |
2178 | * but actions and flags are transferred over to the master |
2179 | * link and handled from there. |
2180 | */ |
2181 | if (ap->slave_link) { |
2182 | struct ata_eh_context *mehc = &ap->link.eh_context; |
2183 | struct ata_eh_context *sehc = &ap->slave_link->eh_context; |
2184 | |
2185 | /* transfer control flags from master to slave */ |
2186 | sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; |
2187 | |
2188 | /* perform autopsy on the slave link */ |
2189 | ata_eh_link_autopsy(link: ap->slave_link); |
2190 | |
2191 | /* transfer actions from slave to master and clear slave */ |
2192 | ata_eh_about_to_do(link: ap->slave_link, NULL, action: ATA_EH_ALL_ACTIONS); |
2193 | mehc->i.action |= sehc->i.action; |
2194 | mehc->i.dev_action[1] |= sehc->i.dev_action[1]; |
2195 | mehc->i.flags |= sehc->i.flags; |
2196 | ata_eh_done(link: ap->slave_link, NULL, action: ATA_EH_ALL_ACTIONS); |
2197 | } |
2198 | |
2199 | /* Autopsy of fanout ports can affect host link autopsy. |
2200 | * Perform host link autopsy last. |
2201 | */ |
2202 | if (sata_pmp_attached(ap)) |
2203 | ata_eh_link_autopsy(link: &ap->link); |
2204 | } |
2205 | |
2206 | /** |
2207 | * ata_get_cmd_name - get name for ATA command |
2208 | * @command: ATA command code to get name for |
2209 | * |
2210 | * Return a textual name of the given command or "unknown" |
2211 | * |
2212 | * LOCKING: |
2213 | * None |
2214 | */ |
2215 | const char *ata_get_cmd_name(u8 command) |
2216 | { |
2217 | #ifdef CONFIG_ATA_VERBOSE_ERROR |
2218 | static const struct |
2219 | { |
2220 | u8 command; |
2221 | const char *text; |
2222 | } cmd_descr[] = { |
2223 | { ATA_CMD_DEV_RESET, "DEVICE RESET" }, |
2224 | { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, |
2225 | { ATA_CMD_STANDBY, "STANDBY" }, |
2226 | { ATA_CMD_IDLE, "IDLE" }, |
2227 | { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, |
2228 | { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, |
2229 | { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, |
2230 | { ATA_CMD_NOP, "NOP" }, |
2231 | { ATA_CMD_FLUSH, "FLUSH CACHE" }, |
2232 | { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, |
2233 | { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, |
2234 | { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, |
2235 | { ATA_CMD_SERVICE, "SERVICE" }, |
2236 | { ATA_CMD_READ, "READ DMA" }, |
2237 | { ATA_CMD_READ_EXT, "READ DMA EXT" }, |
2238 | { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, |
2239 | { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, |
2240 | { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, |
2241 | { ATA_CMD_WRITE, "WRITE DMA" }, |
2242 | { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, |
2243 | { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, |
2244 | { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, |
2245 | { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, |
2246 | { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, |
2247 | { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, |
2248 | { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, |
2249 | { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, |
2250 | { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" }, |
2251 | { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, |
2252 | { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, |
2253 | { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, |
2254 | { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, |
2255 | { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, |
2256 | { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, |
2257 | { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, |
2258 | { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, |
2259 | { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, |
2260 | { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, |
2261 | { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, |
2262 | { ATA_CMD_SET_FEATURES, "SET FEATURES" }, |
2263 | { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, |
2264 | { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, |
2265 | { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, |
2266 | { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, |
2267 | { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, |
2268 | { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, |
2269 | { ATA_CMD_SLEEP, "SLEEP" }, |
2270 | { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, |
2271 | { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, |
2272 | { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, |
2273 | { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, |
2274 | { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, |
2275 | { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, |
2276 | { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, |
2277 | { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, |
2278 | { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, |
2279 | { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, |
2280 | { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, |
2281 | { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, |
2282 | { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, |
2283 | { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, |
2284 | { ATA_CMD_PMP_READ, "READ BUFFER" }, |
2285 | { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, |
2286 | { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, |
2287 | { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, |
2288 | { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, |
2289 | { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, |
2290 | { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, |
2291 | { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, |
2292 | { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, |
2293 | { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, |
2294 | { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, |
2295 | { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, |
2296 | { ATA_CMD_SMART, "SMART" }, |
2297 | { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, |
2298 | { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, |
2299 | { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, |
2300 | { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, |
2301 | { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, |
2302 | { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, |
2303 | { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, |
2304 | { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, |
2305 | { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, |
2306 | { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, |
2307 | { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, |
2308 | { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, |
2309 | { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, |
2310 | { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, |
2311 | { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, |
2312 | { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, |
2313 | { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, |
2314 | { ATA_CMD_RESTORE, "RECALIBRATE" }, |
2315 | { 0, NULL } /* terminate list */ |
2316 | }; |
2317 | |
2318 | unsigned int i; |
2319 | for (i = 0; cmd_descr[i].text; i++) |
2320 | if (cmd_descr[i].command == command) |
2321 | return cmd_descr[i].text; |
2322 | #endif |
2323 | |
2324 | return "unknown" ; |
2325 | } |
2326 | EXPORT_SYMBOL_GPL(ata_get_cmd_name); |
2327 | |
2328 | /** |
2329 | * ata_eh_link_report - report error handling to user |
2330 | * @link: ATA link EH is going on |
2331 | * |
2332 | * Report EH to user. |
2333 | * |
2334 | * LOCKING: |
2335 | * None. |
2336 | */ |
2337 | static void ata_eh_link_report(struct ata_link *link) |
2338 | { |
2339 | struct ata_port *ap = link->ap; |
2340 | struct ata_eh_context *ehc = &link->eh_context; |
2341 | struct ata_queued_cmd *qc; |
2342 | const char *frozen, *desc; |
2343 | char tries_buf[16] = "" ; |
2344 | int tag, nr_failed = 0; |
2345 | |
2346 | if (ehc->i.flags & ATA_EHI_QUIET) |
2347 | return; |
2348 | |
2349 | desc = NULL; |
2350 | if (ehc->i.desc[0] != '\0') |
2351 | desc = ehc->i.desc; |
2352 | |
2353 | ata_qc_for_each_raw(ap, qc, tag) { |
2354 | if (!(qc->flags & ATA_QCFLAG_EH) || |
2355 | ata_dev_phys_link(dev: qc->dev) != link || |
2356 | ((qc->flags & ATA_QCFLAG_QUIET) && |
2357 | qc->err_mask == AC_ERR_DEV)) |
2358 | continue; |
2359 | if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) |
2360 | continue; |
2361 | |
2362 | nr_failed++; |
2363 | } |
2364 | |
2365 | if (!nr_failed && !ehc->i.err_mask) |
2366 | return; |
2367 | |
2368 | frozen = "" ; |
2369 | if (ata_port_is_frozen(ap)) |
2370 | frozen = " frozen" ; |
2371 | |
2372 | if (ap->eh_tries < ATA_EH_MAX_TRIES) |
2373 | snprintf(buf: tries_buf, size: sizeof(tries_buf), fmt: " t%d" , |
2374 | ap->eh_tries); |
2375 | |
2376 | if (ehc->i.dev) { |
2377 | ata_dev_err(ehc->i.dev, "exception Emask 0x%x " |
2378 | "SAct 0x%x SErr 0x%x action 0x%x%s%s\n" , |
2379 | ehc->i.err_mask, link->sactive, ehc->i.serror, |
2380 | ehc->i.action, frozen, tries_buf); |
2381 | if (desc) |
2382 | ata_dev_err(ehc->i.dev, "%s\n" , desc); |
2383 | } else { |
2384 | ata_link_err(link, "exception Emask 0x%x " |
2385 | "SAct 0x%x SErr 0x%x action 0x%x%s%s\n" , |
2386 | ehc->i.err_mask, link->sactive, ehc->i.serror, |
2387 | ehc->i.action, frozen, tries_buf); |
2388 | if (desc) |
2389 | ata_link_err(link, "%s\n" , desc); |
2390 | } |
2391 | |
2392 | #ifdef CONFIG_ATA_VERBOSE_ERROR |
2393 | if (ehc->i.serror) |
2394 | ata_link_err(link, |
2395 | "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n" , |
2396 | ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "" , |
2397 | ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "" , |
2398 | ehc->i.serror & SERR_DATA ? "UnrecovData " : "" , |
2399 | ehc->i.serror & SERR_PERSISTENT ? "Persist " : "" , |
2400 | ehc->i.serror & SERR_PROTOCOL ? "Proto " : "" , |
2401 | ehc->i.serror & SERR_INTERNAL ? "HostInt " : "" , |
2402 | ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "" , |
2403 | ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "" , |
2404 | ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "" , |
2405 | ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "" , |
2406 | ehc->i.serror & SERR_DISPARITY ? "Dispar " : "" , |
2407 | ehc->i.serror & SERR_CRC ? "BadCRC " : "" , |
2408 | ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "" , |
2409 | ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "" , |
2410 | ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "" , |
2411 | ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "" , |
2412 | ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "" ); |
2413 | #endif |
2414 | |
2415 | ata_qc_for_each_raw(ap, qc, tag) { |
2416 | struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; |
2417 | char data_buf[20] = "" ; |
2418 | char cdb_buf[70] = "" ; |
2419 | |
2420 | if (!(qc->flags & ATA_QCFLAG_EH) || |
2421 | ata_dev_phys_link(dev: qc->dev) != link || !qc->err_mask) |
2422 | continue; |
2423 | |
2424 | if (qc->dma_dir != DMA_NONE) { |
2425 | static const char *dma_str[] = { |
2426 | [DMA_BIDIRECTIONAL] = "bidi" , |
2427 | [DMA_TO_DEVICE] = "out" , |
2428 | [DMA_FROM_DEVICE] = "in" , |
2429 | }; |
2430 | const char *prot_str = NULL; |
2431 | |
2432 | switch (qc->tf.protocol) { |
2433 | case ATA_PROT_UNKNOWN: |
2434 | prot_str = "unknown" ; |
2435 | break; |
2436 | case ATA_PROT_NODATA: |
2437 | prot_str = "nodata" ; |
2438 | break; |
2439 | case ATA_PROT_PIO: |
2440 | prot_str = "pio" ; |
2441 | break; |
2442 | case ATA_PROT_DMA: |
2443 | prot_str = "dma" ; |
2444 | break; |
2445 | case ATA_PROT_NCQ: |
2446 | prot_str = "ncq dma" ; |
2447 | break; |
2448 | case ATA_PROT_NCQ_NODATA: |
2449 | prot_str = "ncq nodata" ; |
2450 | break; |
2451 | case ATAPI_PROT_NODATA: |
2452 | prot_str = "nodata" ; |
2453 | break; |
2454 | case ATAPI_PROT_PIO: |
2455 | prot_str = "pio" ; |
2456 | break; |
2457 | case ATAPI_PROT_DMA: |
2458 | prot_str = "dma" ; |
2459 | break; |
2460 | } |
2461 | snprintf(buf: data_buf, size: sizeof(data_buf), fmt: " %s %u %s" , |
2462 | prot_str, qc->nbytes, dma_str[qc->dma_dir]); |
2463 | } |
2464 | |
2465 | if (ata_is_atapi(prot: qc->tf.protocol)) { |
2466 | const u8 *cdb = qc->cdb; |
2467 | size_t cdb_len = qc->dev->cdb_len; |
2468 | |
2469 | if (qc->scsicmd) { |
2470 | cdb = qc->scsicmd->cmnd; |
2471 | cdb_len = qc->scsicmd->cmd_len; |
2472 | } |
2473 | __scsi_format_command(cdb_buf, sizeof(cdb_buf), |
2474 | cdb, cdb_len); |
2475 | } else |
2476 | ata_dev_err(qc->dev, "failed command: %s\n" , |
2477 | ata_get_cmd_name(cmd->command)); |
2478 | |
2479 | ata_dev_err(qc->dev, |
2480 | "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " |
2481 | "tag %d%s\n %s" |
2482 | "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " |
2483 | "Emask 0x%x (%s)%s\n" , |
2484 | cmd->command, cmd->feature, cmd->nsect, |
2485 | cmd->lbal, cmd->lbam, cmd->lbah, |
2486 | cmd->hob_feature, cmd->hob_nsect, |
2487 | cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, |
2488 | cmd->device, qc->tag, data_buf, cdb_buf, |
2489 | res->status, res->error, res->nsect, |
2490 | res->lbal, res->lbam, res->lbah, |
2491 | res->hob_feature, res->hob_nsect, |
2492 | res->hob_lbal, res->hob_lbam, res->hob_lbah, |
2493 | res->device, qc->err_mask, ata_err_string(qc->err_mask), |
2494 | qc->err_mask & AC_ERR_NCQ ? " <F>" : "" ); |
2495 | |
2496 | #ifdef CONFIG_ATA_VERBOSE_ERROR |
2497 | if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | |
2498 | ATA_SENSE | ATA_ERR)) { |
2499 | if (res->status & ATA_BUSY) |
2500 | ata_dev_err(qc->dev, "status: { Busy }\n" ); |
2501 | else |
2502 | ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n" , |
2503 | res->status & ATA_DRDY ? "DRDY " : "" , |
2504 | res->status & ATA_DF ? "DF " : "" , |
2505 | res->status & ATA_DRQ ? "DRQ " : "" , |
2506 | res->status & ATA_SENSE ? "SENSE " : "" , |
2507 | res->status & ATA_ERR ? "ERR " : "" ); |
2508 | } |
2509 | |
2510 | if (cmd->command != ATA_CMD_PACKET && |
2511 | (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF | |
2512 | ATA_ABORTED))) |
2513 | ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n" , |
2514 | res->error & ATA_ICRC ? "ICRC " : "" , |
2515 | res->error & ATA_UNC ? "UNC " : "" , |
2516 | res->error & ATA_AMNF ? "AMNF " : "" , |
2517 | res->error & ATA_IDNF ? "IDNF " : "" , |
2518 | res->error & ATA_ABORTED ? "ABRT " : "" ); |
2519 | #endif |
2520 | } |
2521 | } |
2522 | |
2523 | /** |
2524 | * ata_eh_report - report error handling to user |
2525 | * @ap: ATA port to report EH about |
2526 | * |
2527 | * Report EH to user. |
2528 | * |
2529 | * LOCKING: |
2530 | * None. |
2531 | */ |
2532 | void ata_eh_report(struct ata_port *ap) |
2533 | { |
2534 | struct ata_link *link; |
2535 | |
2536 | ata_for_each_link(link, ap, HOST_FIRST) |
2537 | ata_eh_link_report(link); |
2538 | } |
2539 | |
2540 | static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, |
2541 | unsigned int *classes, unsigned long deadline, |
2542 | bool clear_classes) |
2543 | { |
2544 | struct ata_device *dev; |
2545 | |
2546 | if (clear_classes) |
2547 | ata_for_each_dev(dev, link, ALL) |
2548 | classes[dev->devno] = ATA_DEV_UNKNOWN; |
2549 | |
2550 | return reset(link, classes, deadline); |
2551 | } |
2552 | |
2553 | static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) |
2554 | { |
2555 | if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) |
2556 | return 0; |
2557 | if (rc == -EAGAIN) |
2558 | return 1; |
2559 | if (sata_pmp_supported(ap: link->ap) && ata_is_host_link(link)) |
2560 | return 1; |
2561 | return 0; |
2562 | } |
2563 | |
2564 | int ata_eh_reset(struct ata_link *link, int classify, |
2565 | ata_prereset_fn_t prereset, ata_reset_fn_t softreset, |
2566 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) |
2567 | { |
2568 | struct ata_port *ap = link->ap; |
2569 | struct ata_link *slave = ap->slave_link; |
2570 | struct ata_eh_context *ehc = &link->eh_context; |
2571 | struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; |
2572 | unsigned int *classes = ehc->classes; |
2573 | unsigned int lflags = link->flags; |
2574 | int verbose = !(ehc->i.flags & ATA_EHI_QUIET); |
2575 | int max_tries = 0, try = 0; |
2576 | struct ata_link *failed_link; |
2577 | struct ata_device *dev; |
2578 | unsigned long deadline, now; |
2579 | ata_reset_fn_t reset; |
2580 | unsigned long flags; |
2581 | u32 sstatus; |
2582 | int nr_unknown, rc; |
2583 | |
2584 | /* |
2585 | * Prepare to reset |
2586 | */ |
2587 | while (ata_eh_reset_timeouts[max_tries] != UINT_MAX) |
2588 | max_tries++; |
2589 | if (link->flags & ATA_LFLAG_RST_ONCE) |
2590 | max_tries = 1; |
2591 | if (link->flags & ATA_LFLAG_NO_HRST) |
2592 | hardreset = NULL; |
2593 | if (link->flags & ATA_LFLAG_NO_SRST) |
2594 | softreset = NULL; |
2595 | |
2596 | /* make sure each reset attempt is at least COOL_DOWN apart */ |
2597 | if (ehc->i.flags & ATA_EHI_DID_RESET) { |
2598 | now = jiffies; |
2599 | WARN_ON(time_after(ehc->last_reset, now)); |
2600 | deadline = ata_deadline(from_jiffies: ehc->last_reset, |
2601 | timeout_msecs: ATA_EH_RESET_COOL_DOWN); |
2602 | if (time_before(now, deadline)) |
2603 | schedule_timeout_uninterruptible(timeout: deadline - now); |
2604 | } |
2605 | |
2606 | spin_lock_irqsave(ap->lock, flags); |
2607 | ap->pflags |= ATA_PFLAG_RESETTING; |
2608 | spin_unlock_irqrestore(lock: ap->lock, flags); |
2609 | |
2610 | ata_eh_about_to_do(link, NULL, action: ATA_EH_RESET); |
2611 | |
2612 | ata_for_each_dev(dev, link, ALL) { |
2613 | /* If we issue an SRST then an ATA drive (not ATAPI) |
2614 | * may change configuration and be in PIO0 timing. If |
2615 | * we do a hard reset (or are coming from power on) |
2616 | * this is true for ATA or ATAPI. Until we've set a |
2617 | * suitable controller mode we should not touch the |
2618 | * bus as we may be talking too fast. |
2619 | */ |
2620 | dev->pio_mode = XFER_PIO_0; |
2621 | dev->dma_mode = 0xff; |
2622 | |
2623 | /* If the controller has a pio mode setup function |
2624 | * then use it to set the chipset to rights. Don't |
2625 | * touch the DMA setup as that will be dealt with when |
2626 | * configuring devices. |
2627 | */ |
2628 | if (ap->ops->set_piomode) |
2629 | ap->ops->set_piomode(ap, dev); |
2630 | } |
2631 | |
2632 | /* prefer hardreset */ |
2633 | reset = NULL; |
2634 | ehc->i.action &= ~ATA_EH_RESET; |
2635 | if (hardreset) { |
2636 | reset = hardreset; |
2637 | ehc->i.action |= ATA_EH_HARDRESET; |
2638 | } else if (softreset) { |
2639 | reset = softreset; |
2640 | ehc->i.action |= ATA_EH_SOFTRESET; |
2641 | } |
2642 | |
2643 | if (prereset) { |
2644 | unsigned long deadline = ata_deadline(from_jiffies: jiffies, |
2645 | timeout_msecs: ATA_EH_PRERESET_TIMEOUT); |
2646 | |
2647 | if (slave) { |
2648 | sehc->i.action &= ~ATA_EH_RESET; |
2649 | sehc->i.action |= ehc->i.action; |
2650 | } |
2651 | |
2652 | rc = prereset(link, deadline); |
2653 | |
2654 | /* If present, do prereset on slave link too. Reset |
2655 | * is skipped iff both master and slave links report |
2656 | * -ENOENT or clear ATA_EH_RESET. |
2657 | */ |
2658 | if (slave && (rc == 0 || rc == -ENOENT)) { |
2659 | int tmp; |
2660 | |
2661 | tmp = prereset(slave, deadline); |
2662 | if (tmp != -ENOENT) |
2663 | rc = tmp; |
2664 | |
2665 | ehc->i.action |= sehc->i.action; |
2666 | } |
2667 | |
2668 | if (rc) { |
2669 | if (rc == -ENOENT) { |
2670 | ata_link_dbg(link, "port disabled--ignoring\n" ); |
2671 | ehc->i.action &= ~ATA_EH_RESET; |
2672 | |
2673 | ata_for_each_dev(dev, link, ALL) |
2674 | classes[dev->devno] = ATA_DEV_NONE; |
2675 | |
2676 | rc = 0; |
2677 | } else |
2678 | ata_link_err(link, |
2679 | "prereset failed (errno=%d)\n" , |
2680 | rc); |
2681 | goto out; |
2682 | } |
2683 | |
2684 | /* prereset() might have cleared ATA_EH_RESET. If so, |
2685 | * bang classes, thaw and return. |
2686 | */ |
2687 | if (reset && !(ehc->i.action & ATA_EH_RESET)) { |
2688 | ata_for_each_dev(dev, link, ALL) |
2689 | classes[dev->devno] = ATA_DEV_NONE; |
2690 | if (ata_port_is_frozen(ap) && ata_is_host_link(link)) |
2691 | ata_eh_thaw_port(ap); |
2692 | rc = 0; |
2693 | goto out; |
2694 | } |
2695 | } |
2696 | |
2697 | retry: |
2698 | /* |
2699 | * Perform reset |
2700 | */ |
2701 | if (ata_is_host_link(link)) |
2702 | ata_eh_freeze_port(ap); |
2703 | |
2704 | deadline = ata_deadline(from_jiffies: jiffies, timeout_msecs: ata_eh_reset_timeouts[try++]); |
2705 | |
2706 | if (reset) { |
2707 | if (verbose) |
2708 | ata_link_info(link, "%s resetting link\n" , |
2709 | reset == softreset ? "soft" : "hard" ); |
2710 | |
2711 | /* mark that this EH session started with reset */ |
2712 | ehc->last_reset = jiffies; |
2713 | if (reset == hardreset) { |
2714 | ehc->i.flags |= ATA_EHI_DID_HARDRESET; |
2715 | trace_ata_link_hardreset_begin(link, class: classes, deadline); |
2716 | } else { |
2717 | ehc->i.flags |= ATA_EHI_DID_SOFTRESET; |
2718 | trace_ata_link_softreset_begin(link, class: classes, deadline); |
2719 | } |
2720 | |
2721 | rc = ata_do_reset(link, reset, classes, deadline, clear_classes: true); |
2722 | if (reset == hardreset) |
2723 | trace_ata_link_hardreset_end(link, class: classes, rc); |
2724 | else |
2725 | trace_ata_link_softreset_end(link, class: classes, rc); |
2726 | if (rc && rc != -EAGAIN) { |
2727 | failed_link = link; |
2728 | goto fail; |
2729 | } |
2730 | |
2731 | /* hardreset slave link if existent */ |
2732 | if (slave && reset == hardreset) { |
2733 | int tmp; |
2734 | |
2735 | if (verbose) |
2736 | ata_link_info(slave, "hard resetting link\n" ); |
2737 | |
2738 | ata_eh_about_to_do(link: slave, NULL, action: ATA_EH_RESET); |
2739 | trace_ata_slave_hardreset_begin(link: slave, class: classes, |
2740 | deadline); |
2741 | tmp = ata_do_reset(link: slave, reset, classes, deadline, |
2742 | clear_classes: false); |
2743 | trace_ata_slave_hardreset_end(link: slave, class: classes, rc: tmp); |
2744 | switch (tmp) { |
2745 | case -EAGAIN: |
2746 | rc = -EAGAIN; |
2747 | break; |
2748 | case 0: |
2749 | break; |
2750 | default: |
2751 | failed_link = slave; |
2752 | rc = tmp; |
2753 | goto fail; |
2754 | } |
2755 | } |
2756 | |
2757 | /* perform follow-up SRST if necessary */ |
2758 | if (reset == hardreset && |
2759 | ata_eh_followup_srst_needed(link, rc)) { |
2760 | reset = softreset; |
2761 | |
2762 | if (!reset) { |
2763 | ata_link_err(link, |
2764 | "follow-up softreset required but no softreset available\n" ); |
2765 | failed_link = link; |
2766 | rc = -EINVAL; |
2767 | goto fail; |
2768 | } |
2769 | |
2770 | ata_eh_about_to_do(link, NULL, action: ATA_EH_RESET); |
2771 | trace_ata_link_softreset_begin(link, class: classes, deadline); |
2772 | rc = ata_do_reset(link, reset, classes, deadline, clear_classes: true); |
2773 | trace_ata_link_softreset_end(link, class: classes, rc); |
2774 | if (rc) { |
2775 | failed_link = link; |
2776 | goto fail; |
2777 | } |
2778 | } |
2779 | } else { |
2780 | if (verbose) |
2781 | ata_link_info(link, |
2782 | "no reset method available, skipping reset\n" ); |
2783 | if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) |
2784 | lflags |= ATA_LFLAG_ASSUME_ATA; |
2785 | } |
2786 | |
2787 | /* |
2788 | * Post-reset processing |
2789 | */ |
2790 | ata_for_each_dev(dev, link, ALL) { |
2791 | /* After the reset, the device state is PIO 0 and the |
2792 | * controller state is undefined. Reset also wakes up |
2793 | * drives from sleeping mode. |
2794 | */ |
2795 | dev->pio_mode = XFER_PIO_0; |
2796 | dev->flags &= ~ATA_DFLAG_SLEEPING; |
2797 | |
2798 | if (ata_phys_link_offline(link: ata_dev_phys_link(dev))) |
2799 | continue; |
2800 | |
2801 | /* apply class override */ |
2802 | if (lflags & ATA_LFLAG_ASSUME_ATA) |
2803 | classes[dev->devno] = ATA_DEV_ATA; |
2804 | else if (lflags & ATA_LFLAG_ASSUME_SEMB) |
2805 | classes[dev->devno] = ATA_DEV_SEMB_UNSUP; |
2806 | } |
2807 | |
2808 | /* record current link speed */ |
2809 | if (sata_scr_read(link, reg: SCR_STATUS, val: &sstatus) == 0) |
2810 | link->sata_spd = (sstatus >> 4) & 0xf; |
2811 | if (slave && sata_scr_read(link: slave, reg: SCR_STATUS, val: &sstatus) == 0) |
2812 | slave->sata_spd = (sstatus >> 4) & 0xf; |
2813 | |
2814 | /* thaw the port */ |
2815 | if (ata_is_host_link(link)) |
2816 | ata_eh_thaw_port(ap); |
2817 | |
2818 | /* postreset() should clear hardware SError. Although SError |
2819 | * is cleared during link resume, clearing SError here is |
2820 | * necessary as some PHYs raise hotplug events after SRST. |
2821 | * This introduces race condition where hotplug occurs between |
2822 | * reset and here. This race is mediated by cross checking |
2823 | * link onlineness and classification result later. |
2824 | */ |
2825 | if (postreset) { |
2826 | postreset(link, classes); |
2827 | trace_ata_link_postreset(link, class: classes, rc); |
2828 | if (slave) { |
2829 | postreset(slave, classes); |
2830 | trace_ata_slave_postreset(link: slave, class: classes, rc); |
2831 | } |
2832 | } |
2833 | |
2834 | /* clear cached SError */ |
2835 | spin_lock_irqsave(link->ap->lock, flags); |
2836 | link->eh_info.serror = 0; |
2837 | if (slave) |
2838 | slave->eh_info.serror = 0; |
2839 | spin_unlock_irqrestore(lock: link->ap->lock, flags); |
2840 | |
2841 | /* |
2842 | * Make sure onlineness and classification result correspond. |
2843 | * Hotplug could have happened during reset and some |
2844 | * controllers fail to wait while a drive is spinning up after |
2845 | * being hotplugged causing misdetection. By cross checking |
2846 | * link on/offlineness and classification result, those |
2847 | * conditions can be reliably detected and retried. |
2848 | */ |
2849 | nr_unknown = 0; |
2850 | ata_for_each_dev(dev, link, ALL) { |
2851 | if (ata_phys_link_online(link: ata_dev_phys_link(dev))) { |
2852 | if (classes[dev->devno] == ATA_DEV_UNKNOWN) { |
2853 | ata_dev_dbg(dev, "link online but device misclassified\n" ); |
2854 | classes[dev->devno] = ATA_DEV_NONE; |
2855 | nr_unknown++; |
2856 | } |
2857 | } else if (ata_phys_link_offline(link: ata_dev_phys_link(dev))) { |
2858 | if (ata_class_enabled(class: classes[dev->devno])) |
2859 | ata_dev_dbg(dev, |
2860 | "link offline, clearing class %d to NONE\n" , |
2861 | classes[dev->devno]); |
2862 | classes[dev->devno] = ATA_DEV_NONE; |
2863 | } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { |
2864 | ata_dev_dbg(dev, |
2865 | "link status unknown, clearing UNKNOWN to NONE\n" ); |
2866 | classes[dev->devno] = ATA_DEV_NONE; |
2867 | } |
2868 | } |
2869 | |
2870 | if (classify && nr_unknown) { |
2871 | if (try < max_tries) { |
2872 | ata_link_warn(link, |
2873 | "link online but %d devices misclassified, retrying\n" , |
2874 | nr_unknown); |
2875 | failed_link = link; |
2876 | rc = -EAGAIN; |
2877 | goto fail; |
2878 | } |
2879 | ata_link_warn(link, |
2880 | "link online but %d devices misclassified, " |
2881 | "device detection might fail\n" , nr_unknown); |
2882 | } |
2883 | |
2884 | /* reset successful, schedule revalidation */ |
2885 | ata_eh_done(link, NULL, action: ATA_EH_RESET); |
2886 | if (slave) |
2887 | ata_eh_done(link: slave, NULL, action: ATA_EH_RESET); |
2888 | ehc->last_reset = jiffies; /* update to completion time */ |
2889 | ehc->i.action |= ATA_EH_REVALIDATE; |
2890 | link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ |
2891 | |
2892 | rc = 0; |
2893 | out: |
2894 | /* clear hotplug flag */ |
2895 | ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; |
2896 | if (slave) |
2897 | sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; |
2898 | |
2899 | spin_lock_irqsave(ap->lock, flags); |
2900 | ap->pflags &= ~ATA_PFLAG_RESETTING; |
2901 | spin_unlock_irqrestore(lock: ap->lock, flags); |
2902 | |
2903 | return rc; |
2904 | |
2905 | fail: |
2906 | /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ |
2907 | if (!ata_is_host_link(link) && |
2908 | sata_scr_read(link, reg: SCR_STATUS, val: &sstatus)) |
2909 | rc = -ERESTART; |
2910 | |
2911 | if (try >= max_tries) { |
2912 | /* |
2913 | * Thaw host port even if reset failed, so that the port |
2914 | * can be retried on the next phy event. This risks |
2915 | * repeated EH runs but seems to be a better tradeoff than |
2916 | * shutting down a port after a botched hotplug attempt. |
2917 | */ |
2918 | if (ata_is_host_link(link)) |
2919 | ata_eh_thaw_port(ap); |
2920 | ata_link_warn(link, "%s failed\n" , |
2921 | reset == hardreset ? "hardreset" : "softreset" ); |
2922 | goto out; |
2923 | } |
2924 | |
2925 | now = jiffies; |
2926 | if (time_before(now, deadline)) { |
2927 | unsigned long delta = deadline - now; |
2928 | |
2929 | ata_link_warn(failed_link, |
2930 | "reset failed (errno=%d), retrying in %u secs\n" , |
2931 | rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); |
2932 | |
2933 | ata_eh_release(ap); |
2934 | while (delta) |
2935 | delta = schedule_timeout_uninterruptible(timeout: delta); |
2936 | ata_eh_acquire(ap); |
2937 | } |
2938 | |
2939 | /* |
2940 | * While disks spinup behind PMP, some controllers fail sending SRST. |
2941 | * They need to be reset - as well as the PMP - before retrying. |
2942 | */ |
2943 | if (rc == -ERESTART) { |
2944 | if (ata_is_host_link(link)) |
2945 | ata_eh_thaw_port(ap); |
2946 | goto out; |
2947 | } |
2948 | |
2949 | if (try == max_tries - 1) { |
2950 | sata_down_spd_limit(link, spd_limit: 0); |
2951 | if (slave) |
2952 | sata_down_spd_limit(link: slave, spd_limit: 0); |
2953 | } else if (rc == -EPIPE) |
2954 | sata_down_spd_limit(link: failed_link, spd_limit: 0); |
2955 | |
2956 | if (hardreset) |
2957 | reset = hardreset; |
2958 | goto retry; |
2959 | } |
2960 | |
2961 | static inline void ata_eh_pull_park_action(struct ata_port *ap) |
2962 | { |
2963 | struct ata_link *link; |
2964 | struct ata_device *dev; |
2965 | unsigned long flags; |
2966 | |
2967 | /* |
2968 | * This function can be thought of as an extended version of |
2969 | * ata_eh_about_to_do() specially crafted to accommodate the |
2970 | * requirements of ATA_EH_PARK handling. Since the EH thread |
2971 | * does not leave the do {} while () loop in ata_eh_recover as |
2972 | * long as the timeout for a park request to *one* device on |
2973 | * the port has not expired, and since we still want to pick |
2974 | * up park requests to other devices on the same port or |
2975 | * timeout updates for the same device, we have to pull |
2976 | * ATA_EH_PARK actions from eh_info into eh_context.i |
2977 | * ourselves at the beginning of each pass over the loop. |
2978 | * |
2979 | * Additionally, all write accesses to &ap->park_req_pending |
2980 | * through reinit_completion() (see below) or complete_all() |
2981 | * (see ata_scsi_park_store()) are protected by the host lock. |
2982 | * As a result we have that park_req_pending.done is zero on |
2983 | * exit from this function, i.e. when ATA_EH_PARK actions for |
2984 | * *all* devices on port ap have been pulled into the |
2985 | * respective eh_context structs. If, and only if, |
2986 | * park_req_pending.done is non-zero by the time we reach |
2987 | * wait_for_completion_timeout(), another ATA_EH_PARK action |
2988 | * has been scheduled for at least one of the devices on port |
2989 | * ap and we have to cycle over the do {} while () loop in |
2990 | * ata_eh_recover() again. |
2991 | */ |
2992 | |
2993 | spin_lock_irqsave(ap->lock, flags); |
2994 | reinit_completion(x: &ap->park_req_pending); |
2995 | ata_for_each_link(link, ap, EDGE) { |
2996 | ata_for_each_dev(dev, link, ALL) { |
2997 | struct ata_eh_info *ehi = &link->eh_info; |
2998 | |
2999 | link->eh_context.i.dev_action[dev->devno] |= |
3000 | ehi->dev_action[dev->devno] & ATA_EH_PARK; |
3001 | ata_eh_clear_action(link, dev, ehi, action: ATA_EH_PARK); |
3002 | } |
3003 | } |
3004 | spin_unlock_irqrestore(lock: ap->lock, flags); |
3005 | } |
3006 | |
3007 | static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) |
3008 | { |
3009 | struct ata_eh_context *ehc = &dev->link->eh_context; |
3010 | struct ata_taskfile tf; |
3011 | unsigned int err_mask; |
3012 | |
3013 | ata_tf_init(dev, tf: &tf); |
3014 | if (park) { |
3015 | ehc->unloaded_mask |= 1 << dev->devno; |
3016 | tf.command = ATA_CMD_IDLEIMMEDIATE; |
3017 | tf.feature = 0x44; |
3018 | tf.lbal = 0x4c; |
3019 | tf.lbam = 0x4e; |
3020 | tf.lbah = 0x55; |
3021 | } else { |
3022 | ehc->unloaded_mask &= ~(1 << dev->devno); |
3023 | tf.command = ATA_CMD_CHK_POWER; |
3024 | } |
3025 | |
3026 | tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; |
3027 | tf.protocol = ATA_PROT_NODATA; |
3028 | err_mask = ata_exec_internal(dev, tf: &tf, NULL, dma_dir: DMA_NONE, NULL, buflen: 0, timeout: 0); |
3029 | if (park && (err_mask || tf.lbal != 0xc4)) { |
3030 | ata_dev_err(dev, "head unload failed!\n" ); |
3031 | ehc->unloaded_mask &= ~(1 << dev->devno); |
3032 | } |
3033 | } |
3034 | |
3035 | static int ata_eh_revalidate_and_attach(struct ata_link *link, |
3036 | struct ata_device **r_failed_dev) |
3037 | { |
3038 | struct ata_port *ap = link->ap; |
3039 | struct ata_eh_context *ehc = &link->eh_context; |
3040 | struct ata_device *dev; |
3041 | unsigned int new_mask = 0; |
3042 | unsigned long flags; |
3043 | int rc = 0; |
3044 | |
3045 | /* For PATA drive side cable detection to work, IDENTIFY must |
3046 | * be done backwards such that PDIAG- is released by the slave |
3047 | * device before the master device is identified. |
3048 | */ |
3049 | ata_for_each_dev(dev, link, ALL_REVERSE) { |
3050 | unsigned int action = ata_eh_dev_action(dev); |
3051 | unsigned int readid_flags = 0; |
3052 | |
3053 | if (ehc->i.flags & ATA_EHI_DID_RESET) |
3054 | readid_flags |= ATA_READID_POSTRESET; |
3055 | |
3056 | if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { |
3057 | WARN_ON(dev->class == ATA_DEV_PMP); |
3058 | |
3059 | /* |
3060 | * The link may be in a deep sleep, wake it up. |
3061 | * |
3062 | * If the link is in deep sleep, ata_phys_link_offline() |
3063 | * will return true, causing the revalidation to fail, |
3064 | * which leads to a (potentially) needless hard reset. |
3065 | * |
3066 | * ata_eh_recover() will later restore the link policy |
3067 | * to ap->target_lpm_policy after revalidation is done. |
3068 | */ |
3069 | if (link->lpm_policy > ATA_LPM_MAX_POWER) { |
3070 | rc = ata_eh_set_lpm(link, policy: ATA_LPM_MAX_POWER, |
3071 | r_failed_dev); |
3072 | if (rc) |
3073 | goto err; |
3074 | } |
3075 | |
3076 | if (ata_phys_link_offline(link: ata_dev_phys_link(dev))) { |
3077 | rc = -EIO; |
3078 | goto err; |
3079 | } |
3080 | |
3081 | ata_eh_about_to_do(link, dev, action: ATA_EH_REVALIDATE); |
3082 | rc = ata_dev_revalidate(dev, new_class: ehc->classes[dev->devno], |
3083 | readid_flags); |
3084 | if (rc) |
3085 | goto err; |
3086 | |
3087 | ata_eh_done(link, dev, action: ATA_EH_REVALIDATE); |
3088 | |
3089 | /* Configuration may have changed, reconfigure |
3090 | * transfer mode. |
3091 | */ |
3092 | ehc->i.flags |= ATA_EHI_SETMODE; |
3093 | |
3094 | /* schedule the scsi_rescan_device() here */ |
3095 | schedule_delayed_work(dwork: &ap->scsi_rescan_task, delay: 0); |
3096 | } else if (dev->class == ATA_DEV_UNKNOWN && |
3097 | ehc->tries[dev->devno] && |
3098 | ata_class_enabled(class: ehc->classes[dev->devno])) { |
3099 | /* Temporarily set dev->class, it will be |
3100 | * permanently set once all configurations are |
3101 | * complete. This is necessary because new |
3102 | * device configuration is done in two |
3103 | * separate loops. |
3104 | */ |
3105 | dev->class = ehc->classes[dev->devno]; |
3106 | |
3107 | if (dev->class == ATA_DEV_PMP) |
3108 | rc = sata_pmp_attach(dev); |
3109 | else |
3110 | rc = ata_dev_read_id(dev, p_class: &dev->class, |
3111 | flags: readid_flags, id: dev->id); |
3112 | |
3113 | /* read_id might have changed class, store and reset */ |
3114 | ehc->classes[dev->devno] = dev->class; |
3115 | dev->class = ATA_DEV_UNKNOWN; |
3116 | |
3117 | switch (rc) { |
3118 | case 0: |
3119 | /* clear error info accumulated during probe */ |
3120 | ata_ering_clear(ering: &dev->ering); |
3121 | new_mask |= 1 << dev->devno; |
3122 | break; |
3123 | case -ENOENT: |
3124 | /* IDENTIFY was issued to non-existent |
3125 | * device. No need to reset. Just |
3126 | * thaw and ignore the device. |
3127 | */ |
3128 | ata_eh_thaw_port(ap); |
3129 | break; |
3130 | default: |
3131 | goto err; |
3132 | } |
3133 | } |
3134 | } |
3135 | |
3136 | /* PDIAG- should have been released, ask cable type if post-reset */ |
3137 | if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { |
3138 | if (ap->ops->cable_detect) |
3139 | ap->cbl = ap->ops->cable_detect(ap); |
3140 | ata_force_cbl(ap); |
3141 | } |
3142 | |
3143 | /* Configure new devices forward such that user doesn't see |
3144 | * device detection messages backwards. |
3145 | */ |
3146 | ata_for_each_dev(dev, link, ALL) { |
3147 | if (!(new_mask & (1 << dev->devno))) |
3148 | continue; |
3149 | |
3150 | dev->class = ehc->classes[dev->devno]; |
3151 | |
3152 | if (dev->class == ATA_DEV_PMP) |
3153 | continue; |
3154 | |
3155 | ehc->i.flags |= ATA_EHI_PRINTINFO; |
3156 | rc = ata_dev_configure(dev); |
3157 | ehc->i.flags &= ~ATA_EHI_PRINTINFO; |
3158 | if (rc) { |
3159 | dev->class = ATA_DEV_UNKNOWN; |
3160 | goto err; |
3161 | } |
3162 | |
3163 | spin_lock_irqsave(ap->lock, flags); |
3164 | ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; |
3165 | spin_unlock_irqrestore(lock: ap->lock, flags); |
3166 | |
3167 | /* new device discovered, configure xfermode */ |
3168 | ehc->i.flags |= ATA_EHI_SETMODE; |
3169 | } |
3170 | |
3171 | return 0; |
3172 | |
3173 | err: |
3174 | dev->flags &= ~ATA_DFLAG_RESUMING; |
3175 | *r_failed_dev = dev; |
3176 | return rc; |
3177 | } |
3178 | |
3179 | /** |
3180 | * ata_set_mode - Program timings and issue SET FEATURES - XFER |
3181 | * @link: link on which timings will be programmed |
3182 | * @r_failed_dev: out parameter for failed device |
3183 | * |
3184 | * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If |
3185 | * ata_set_mode() fails, pointer to the failing device is |
3186 | * returned in @r_failed_dev. |
3187 | * |
3188 | * LOCKING: |
3189 | * PCI/etc. bus probe sem. |
3190 | * |
3191 | * RETURNS: |
3192 | * 0 on success, negative errno otherwise |
3193 | */ |
3194 | int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) |
3195 | { |
3196 | struct ata_port *ap = link->ap; |
3197 | struct ata_device *dev; |
3198 | int rc; |
3199 | |
3200 | /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ |
3201 | ata_for_each_dev(dev, link, ENABLED) { |
3202 | if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { |
3203 | struct ata_ering_entry *ent; |
3204 | |
3205 | ent = ata_ering_top(ering: &dev->ering); |
3206 | if (ent) |
3207 | ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; |
3208 | } |
3209 | } |
3210 | |
3211 | /* has private set_mode? */ |
3212 | if (ap->ops->set_mode) |
3213 | rc = ap->ops->set_mode(link, r_failed_dev); |
3214 | else |
3215 | rc = ata_do_set_mode(link, r_failed_dev); |
3216 | |
3217 | /* if transfer mode has changed, set DUBIOUS_XFER on device */ |
3218 | ata_for_each_dev(dev, link, ENABLED) { |
3219 | struct ata_eh_context *ehc = &link->eh_context; |
3220 | u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; |
3221 | u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); |
3222 | |
3223 | if (dev->xfer_mode != saved_xfer_mode || |
3224 | ata_ncq_enabled(dev) != saved_ncq) |
3225 | dev->flags |= ATA_DFLAG_DUBIOUS_XFER; |
3226 | } |
3227 | |
3228 | return rc; |
3229 | } |
3230 | |
3231 | /** |
3232 | * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset |
3233 | * @dev: ATAPI device to clear UA for |
3234 | * |
3235 | * Resets and other operations can make an ATAPI device raise |
3236 | * UNIT ATTENTION which causes the next operation to fail. This |
3237 | * function clears UA. |
3238 | * |
3239 | * LOCKING: |
3240 | * EH context (may sleep). |
3241 | * |
3242 | * RETURNS: |
3243 | * 0 on success, -errno on failure. |
3244 | */ |
3245 | static int atapi_eh_clear_ua(struct ata_device *dev) |
3246 | { |
3247 | int i; |
3248 | |
3249 | for (i = 0; i < ATA_EH_UA_TRIES; i++) { |
3250 | u8 *sense_buffer = dev->link->ap->sector_buf; |
3251 | u8 sense_key = 0; |
3252 | unsigned int err_mask; |
3253 | |
3254 | err_mask = atapi_eh_tur(dev, r_sense_key: &sense_key); |
3255 | if (err_mask != 0 && err_mask != AC_ERR_DEV) { |
3256 | ata_dev_warn(dev, |
3257 | "TEST_UNIT_READY failed (err_mask=0x%x)\n" , |
3258 | err_mask); |
3259 | return -EIO; |
3260 | } |
3261 | |
3262 | if (!err_mask || sense_key != UNIT_ATTENTION) |
3263 | return 0; |
3264 | |
3265 | err_mask = atapi_eh_request_sense(dev, sense_buf: sense_buffer, dfl_sense_key: sense_key); |
3266 | if (err_mask) { |
3267 | ata_dev_warn(dev, "failed to clear " |
3268 | "UNIT ATTENTION (err_mask=0x%x)\n" , err_mask); |
3269 | return -EIO; |
3270 | } |
3271 | } |
3272 | |
3273 | ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n" , |
3274 | ATA_EH_UA_TRIES); |
3275 | |
3276 | return 0; |
3277 | } |
3278 | |
3279 | /** |
3280 | * ata_eh_maybe_retry_flush - Retry FLUSH if necessary |
3281 | * @dev: ATA device which may need FLUSH retry |
3282 | * |
3283 | * If @dev failed FLUSH, it needs to be reported upper layer |
3284 | * immediately as it means that @dev failed to remap and already |
3285 | * lost at least a sector and further FLUSH retrials won't make |
3286 | * any difference to the lost sector. However, if FLUSH failed |
3287 | * for other reasons, for example transmission error, FLUSH needs |
3288 | * to be retried. |
3289 | * |
3290 | * This function determines whether FLUSH failure retry is |
3291 | * necessary and performs it if so. |
3292 | * |
3293 | * RETURNS: |
3294 | * 0 if EH can continue, -errno if EH needs to be repeated. |
3295 | */ |
3296 | static int ata_eh_maybe_retry_flush(struct ata_device *dev) |
3297 | { |
3298 | struct ata_link *link = dev->link; |
3299 | struct ata_port *ap = link->ap; |
3300 | struct ata_queued_cmd *qc; |
3301 | struct ata_taskfile tf; |
3302 | unsigned int err_mask; |
3303 | int rc = 0; |
3304 | |
3305 | /* did flush fail for this device? */ |
3306 | if (!ata_tag_valid(tag: link->active_tag)) |
3307 | return 0; |
3308 | |
3309 | qc = __ata_qc_from_tag(ap, tag: link->active_tag); |
3310 | if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && |
3311 | qc->tf.command != ATA_CMD_FLUSH)) |
3312 | return 0; |
3313 | |
3314 | /* if the device failed it, it should be reported to upper layers */ |
3315 | if (qc->err_mask & AC_ERR_DEV) |
3316 | return 0; |
3317 | |
3318 | /* flush failed for some other reason, give it another shot */ |
3319 | ata_tf_init(dev, tf: &tf); |
3320 | |
3321 | tf.command = qc->tf.command; |
3322 | tf.flags |= ATA_TFLAG_DEVICE; |
3323 | tf.protocol = ATA_PROT_NODATA; |
3324 | |
3325 | ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n" , |
3326 | tf.command, qc->err_mask); |
3327 | |
3328 | err_mask = ata_exec_internal(dev, tf: &tf, NULL, dma_dir: DMA_NONE, NULL, buflen: 0, timeout: 0); |
3329 | if (!err_mask) { |
3330 | /* |
3331 | * FLUSH is complete but there's no way to |
3332 | * successfully complete a failed command from EH. |
3333 | * Making sure retry is allowed at least once and |
3334 | * retrying it should do the trick - whatever was in |
3335 | * the cache is already on the platter and this won't |
3336 | * cause infinite loop. |
3337 | */ |
3338 | qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); |
3339 | } else { |
3340 | ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n" , |
3341 | err_mask); |
3342 | rc = -EIO; |
3343 | |
3344 | /* if device failed it, report it to upper layers */ |
3345 | if (err_mask & AC_ERR_DEV) { |
3346 | qc->err_mask |= AC_ERR_DEV; |
3347 | qc->result_tf = tf; |
3348 | if (!ata_port_is_frozen(ap)) |
3349 | rc = 0; |
3350 | } |
3351 | } |
3352 | return rc; |
3353 | } |
3354 | |
3355 | /** |
3356 | * ata_eh_set_lpm - configure SATA interface power management |
3357 | * @link: link to configure power management |
3358 | * @policy: the link power management policy |
3359 | * @r_failed_dev: out parameter for failed device |
3360 | * |
3361 | * Enable SATA Interface power management. This will enable |
3362 | * Device Interface Power Management (DIPM) for min_power and |
3363 | * medium_power_with_dipm policies, and then call driver specific |
3364 | * callbacks for enabling Host Initiated Power management. |
3365 | * |
3366 | * LOCKING: |
3367 | * EH context. |
3368 | * |
3369 | * RETURNS: |
3370 | * 0 on success, -errno on failure. |
3371 | */ |
3372 | static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, |
3373 | struct ata_device **r_failed_dev) |
3374 | { |
3375 | struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; |
3376 | struct ata_eh_context *ehc = &link->eh_context; |
3377 | struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; |
3378 | enum ata_lpm_policy old_policy = link->lpm_policy; |
3379 | bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; |
3380 | unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; |
3381 | unsigned int err_mask; |
3382 | int rc; |
3383 | |
3384 | /* if the link or host doesn't do LPM, noop */ |
3385 | if (!IS_ENABLED(CONFIG_SATA_HOST) || |
3386 | (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) |
3387 | return 0; |
3388 | |
3389 | /* |
3390 | * DIPM is enabled only for MIN_POWER as some devices |
3391 | * misbehave when the host NACKs transition to SLUMBER. Order |
3392 | * device and link configurations such that the host always |
3393 | * allows DIPM requests. |
3394 | */ |
3395 | ata_for_each_dev(dev, link, ENABLED) { |
3396 | bool hipm = ata_id_has_hipm(id: dev->id); |
3397 | bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; |
3398 | |
3399 | /* find the first enabled and LPM enabled devices */ |
3400 | if (!link_dev) |
3401 | link_dev = dev; |
3402 | |
3403 | if (!lpm_dev && (hipm || dipm)) |
3404 | lpm_dev = dev; |
3405 | |
3406 | hints &= ~ATA_LPM_EMPTY; |
3407 | if (!hipm) |
3408 | hints &= ~ATA_LPM_HIPM; |
3409 | |
3410 | /* disable DIPM before changing link config */ |
3411 | if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { |
3412 | err_mask = ata_dev_set_feature(dev, |
3413 | subcmd: SETFEATURES_SATA_DISABLE, action: SATA_DIPM); |
3414 | if (err_mask && err_mask != AC_ERR_DEV) { |
3415 | ata_dev_warn(dev, |
3416 | "failed to disable DIPM, Emask 0x%x\n" , |
3417 | err_mask); |
3418 | rc = -EIO; |
3419 | goto fail; |
3420 | } |
3421 | } |
3422 | } |
3423 | |
3424 | if (ap) { |
3425 | rc = ap->ops->set_lpm(link, policy, hints); |
3426 | if (!rc && ap->slave_link) |
3427 | rc = ap->ops->set_lpm(ap->slave_link, policy, hints); |
3428 | } else |
3429 | rc = sata_pmp_set_lpm(link, policy, hints); |
3430 | |
3431 | /* |
3432 | * Attribute link config failure to the first (LPM) enabled |
3433 | * device on the link. |
3434 | */ |
3435 | if (rc) { |
3436 | if (rc == -EOPNOTSUPP) { |
3437 | link->flags |= ATA_LFLAG_NO_LPM; |
3438 | return 0; |
3439 | } |
3440 | dev = lpm_dev ? lpm_dev : link_dev; |
3441 | goto fail; |
3442 | } |
3443 | |
3444 | /* |
3445 | * Low level driver acked the transition. Issue DIPM command |
3446 | * with the new policy set. |
3447 | */ |
3448 | link->lpm_policy = policy; |
3449 | if (ap && ap->slave_link) |
3450 | ap->slave_link->lpm_policy = policy; |
3451 | |
3452 | /* host config updated, enable DIPM if transitioning to MIN_POWER */ |
3453 | ata_for_each_dev(dev, link, ENABLED) { |
3454 | if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && |
3455 | ata_id_has_dipm(dev->id)) { |
3456 | err_mask = ata_dev_set_feature(dev, |
3457 | subcmd: SETFEATURES_SATA_ENABLE, action: SATA_DIPM); |
3458 | if (err_mask && err_mask != AC_ERR_DEV) { |
3459 | ata_dev_warn(dev, |
3460 | "failed to enable DIPM, Emask 0x%x\n" , |
3461 | err_mask); |
3462 | rc = -EIO; |
3463 | goto fail; |
3464 | } |
3465 | } |
3466 | } |
3467 | |
3468 | link->last_lpm_change = jiffies; |
3469 | link->flags |= ATA_LFLAG_CHANGED; |
3470 | |
3471 | return 0; |
3472 | |
3473 | fail: |
3474 | /* restore the old policy */ |
3475 | link->lpm_policy = old_policy; |
3476 | if (ap && ap->slave_link) |
3477 | ap->slave_link->lpm_policy = old_policy; |
3478 | |
3479 | /* if no device or only one more chance is left, disable LPM */ |
3480 | if (!dev || ehc->tries[dev->devno] <= 2) { |
3481 | ata_link_warn(link, "disabling LPM on the link\n" ); |
3482 | link->flags |= ATA_LFLAG_NO_LPM; |
3483 | } |
3484 | if (r_failed_dev) |
3485 | *r_failed_dev = dev; |
3486 | return rc; |
3487 | } |
3488 | |
3489 | int ata_link_nr_enabled(struct ata_link *link) |
3490 | { |
3491 | struct ata_device *dev; |
3492 | int cnt = 0; |
3493 | |
3494 | ata_for_each_dev(dev, link, ENABLED) |
3495 | cnt++; |
3496 | return cnt; |
3497 | } |
3498 | |
3499 | static int ata_link_nr_vacant(struct ata_link *link) |
3500 | { |
3501 | struct ata_device *dev; |
3502 | int cnt = 0; |
3503 | |
3504 | ata_for_each_dev(dev, link, ALL) |
3505 | if (dev->class == ATA_DEV_UNKNOWN) |
3506 | cnt++; |
3507 | return cnt; |
3508 | } |
3509 | |
3510 | static int ata_eh_skip_recovery(struct ata_link *link) |
3511 | { |
3512 | struct ata_port *ap = link->ap; |
3513 | struct ata_eh_context *ehc = &link->eh_context; |
3514 | struct ata_device *dev; |
3515 | |
3516 | /* skip disabled links */ |
3517 | if (link->flags & ATA_LFLAG_DISABLED) |
3518 | return 1; |
3519 | |
3520 | /* skip if explicitly requested */ |
3521 | if (ehc->i.flags & ATA_EHI_NO_RECOVERY) |
3522 | return 1; |
3523 | |
3524 | /* thaw frozen port and recover failed devices */ |
3525 | if (ata_port_is_frozen(ap) || ata_link_nr_enabled(link)) |
3526 | return 0; |
3527 | |
3528 | /* reset at least once if reset is requested */ |
3529 | if ((ehc->i.action & ATA_EH_RESET) && |
3530 | !(ehc->i.flags & ATA_EHI_DID_RESET)) |
3531 | return 0; |
3532 | |
3533 | /* skip if class codes for all vacant slots are ATA_DEV_NONE */ |
3534 | ata_for_each_dev(dev, link, ALL) { |
3535 | if (dev->class == ATA_DEV_UNKNOWN && |
3536 | ehc->classes[dev->devno] != ATA_DEV_NONE) |
3537 | return 0; |
3538 | } |
3539 | |
3540 | return 1; |
3541 | } |
3542 | |
3543 | static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) |
3544 | { |
3545 | u64 interval = msecs_to_jiffies(m: ATA_EH_PROBE_TRIAL_INTERVAL); |
3546 | u64 now = get_jiffies_64(); |
3547 | int *trials = void_arg; |
3548 | |
3549 | if ((ent->eflags & ATA_EFLAG_OLD_ER) || |
3550 | (ent->timestamp < now - min(now, interval))) |
3551 | return -1; |
3552 | |
3553 | (*trials)++; |
3554 | return 0; |
3555 | } |
3556 | |
3557 | static int ata_eh_schedule_probe(struct ata_device *dev) |
3558 | { |
3559 | struct ata_eh_context *ehc = &dev->link->eh_context; |
3560 | struct ata_link *link = ata_dev_phys_link(dev); |
3561 | int trials = 0; |
3562 | |
3563 | if (!(ehc->i.probe_mask & (1 << dev->devno)) || |
3564 | (ehc->did_probe_mask & (1 << dev->devno))) |
3565 | return 0; |
3566 | |
3567 | ata_eh_detach_dev(dev); |
3568 | ata_dev_init(dev); |
3569 | ehc->did_probe_mask |= (1 << dev->devno); |
3570 | ehc->i.action |= ATA_EH_RESET; |
3571 | ehc->saved_xfer_mode[dev->devno] = 0; |
3572 | ehc->saved_ncq_enabled &= ~(1 << dev->devno); |
3573 | |
3574 | /* the link maybe in a deep sleep, wake it up */ |
3575 | if (link->lpm_policy > ATA_LPM_MAX_POWER) { |
3576 | if (ata_is_host_link(link)) |
3577 | link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, |
3578 | ATA_LPM_EMPTY); |
3579 | else |
3580 | sata_pmp_set_lpm(link, policy: ATA_LPM_MAX_POWER, |
3581 | hints: ATA_LPM_EMPTY); |
3582 | } |
3583 | |
3584 | /* Record and count probe trials on the ering. The specific |
3585 | * error mask used is irrelevant. Because a successful device |
3586 | * detection clears the ering, this count accumulates only if |
3587 | * there are consecutive failed probes. |
3588 | * |
3589 | * If the count is equal to or higher than ATA_EH_PROBE_TRIALS |
3590 | * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is |
3591 | * forced to 1.5Gbps. |
3592 | * |
3593 | * This is to work around cases where failed link speed |
3594 | * negotiation results in device misdetection leading to |
3595 | * infinite DEVXCHG or PHRDY CHG events. |
3596 | */ |
3597 | ata_ering_record(ering: &dev->ering, eflags: 0, err_mask: AC_ERR_OTHER); |
3598 | ata_ering_map(ering: &dev->ering, map_fn: ata_count_probe_trials_cb, arg: &trials); |
3599 | |
3600 | if (trials > ATA_EH_PROBE_TRIALS) |
3601 | sata_down_spd_limit(link, spd_limit: 1); |
3602 | |
3603 | return 1; |
3604 | } |
3605 | |
3606 | static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) |
3607 | { |
3608 | struct ata_eh_context *ehc = &dev->link->eh_context; |
3609 | |
3610 | /* -EAGAIN from EH routine indicates retry without prejudice. |
3611 | * The requester is responsible for ensuring forward progress. |
3612 | */ |
3613 | if (err != -EAGAIN) |
3614 | ehc->tries[dev->devno]--; |
3615 | |
3616 | switch (err) { |
3617 | case -ENODEV: |
3618 | /* device missing or wrong IDENTIFY data, schedule probing */ |
3619 | ehc->i.probe_mask |= (1 << dev->devno); |
3620 | fallthrough; |
3621 | case -EINVAL: |
3622 | /* give it just one more chance */ |
3623 | ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); |
3624 | fallthrough; |
3625 | case -EIO: |
3626 | if (ehc->tries[dev->devno] == 1) { |
3627 | /* This is the last chance, better to slow |
3628 | * down than lose it. |
3629 | */ |
3630 | sata_down_spd_limit(link: ata_dev_phys_link(dev), spd_limit: 0); |
3631 | if (dev->pio_mode > XFER_PIO_0) |
3632 | ata_down_xfermask_limit(dev, sel: ATA_DNXFER_PIO); |
3633 | } |
3634 | } |
3635 | |
3636 | if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { |
3637 | /* disable device if it has used up all its chances */ |
3638 | ata_dev_disable(dev); |
3639 | |
3640 | /* detach if offline */ |
3641 | if (ata_phys_link_offline(link: ata_dev_phys_link(dev))) |
3642 | ata_eh_detach_dev(dev); |
3643 | |
3644 | /* schedule probe if necessary */ |
3645 | if (ata_eh_schedule_probe(dev)) { |
3646 | ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; |
3647 | memset(ehc->cmd_timeout_idx[dev->devno], 0, |
3648 | sizeof(ehc->cmd_timeout_idx[dev->devno])); |
3649 | } |
3650 | |
3651 | return 1; |
3652 | } else { |
3653 | ehc->i.action |= ATA_EH_RESET; |
3654 | return 0; |
3655 | } |
3656 | } |
3657 | |
3658 | /** |
3659 | * ata_eh_recover - recover host port after error |
3660 | * @ap: host port to recover |
3661 | * @prereset: prereset method (can be NULL) |
3662 | * @softreset: softreset method (can be NULL) |
3663 | * @hardreset: hardreset method (can be NULL) |
3664 | * @postreset: postreset method (can be NULL) |
3665 | * @r_failed_link: out parameter for failed link |
3666 | * |
3667 | * This is the alpha and omega, eum and yang, heart and soul of |
3668 | * libata exception handling. On entry, actions required to |
3669 | * recover each link and hotplug requests are recorded in the |
3670 | * link's eh_context. This function executes all the operations |
3671 | * with appropriate retrials and fallbacks to resurrect failed |
3672 | * devices, detach goners and greet newcomers. |
3673 | * |
3674 | * LOCKING: |
3675 | * Kernel thread context (may sleep). |
3676 | * |
3677 | * RETURNS: |
3678 | * 0 on success, -errno on failure. |
3679 | */ |
3680 | int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, |
3681 | ata_reset_fn_t softreset, ata_reset_fn_t hardreset, |
3682 | ata_postreset_fn_t postreset, |
3683 | struct ata_link **r_failed_link) |
3684 | { |
3685 | struct ata_link *link; |
3686 | struct ata_device *dev; |
3687 | int rc, nr_fails; |
3688 | unsigned long flags, deadline; |
3689 | |
3690 | /* prep for recovery */ |
3691 | ata_for_each_link(link, ap, EDGE) { |
3692 | struct ata_eh_context *ehc = &link->eh_context; |
3693 | |
3694 | /* re-enable link? */ |
3695 | if (ehc->i.action & ATA_EH_ENABLE_LINK) { |
3696 | ata_eh_about_to_do(link, NULL, action: ATA_EH_ENABLE_LINK); |
3697 | spin_lock_irqsave(ap->lock, flags); |
3698 | link->flags &= ~ATA_LFLAG_DISABLED; |
3699 | spin_unlock_irqrestore(lock: ap->lock, flags); |
3700 | ata_eh_done(link, NULL, action: ATA_EH_ENABLE_LINK); |
3701 | } |
3702 | |
3703 | ata_for_each_dev(dev, link, ALL) { |
3704 | if (link->flags & ATA_LFLAG_NO_RETRY) |
3705 | ehc->tries[dev->devno] = 1; |
3706 | else |
3707 | ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; |
3708 | |
3709 | /* collect port action mask recorded in dev actions */ |
3710 | ehc->i.action |= ehc->i.dev_action[dev->devno] & |
3711 | ~ATA_EH_PERDEV_MASK; |
3712 | ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; |
3713 | |
3714 | /* process hotplug request */ |
3715 | if (dev->flags & ATA_DFLAG_DETACH) |
3716 | ata_eh_detach_dev(dev); |
3717 | |
3718 | /* schedule probe if necessary */ |
3719 | if (!ata_dev_enabled(dev)) |
3720 | ata_eh_schedule_probe(dev); |
3721 | } |
3722 | } |
3723 | |
3724 | retry: |
3725 | rc = 0; |
3726 | |
3727 | /* if UNLOADING, finish immediately */ |
3728 | if (ap->pflags & ATA_PFLAG_UNLOADING) |
3729 | goto out; |
3730 | |
3731 | /* prep for EH */ |
3732 | ata_for_each_link(link, ap, EDGE) { |
3733 | struct ata_eh_context *ehc = &link->eh_context; |
3734 | |
3735 | /* skip EH if possible. */ |
3736 | if (ata_eh_skip_recovery(link)) |
3737 | ehc->i.action = 0; |
3738 | |
3739 | ata_for_each_dev(dev, link, ALL) |
3740 | ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; |
3741 | } |
3742 | |
3743 | /* reset */ |
3744 | ata_for_each_link(link, ap, EDGE) { |
3745 | struct ata_eh_context *ehc = &link->eh_context; |
3746 | |
3747 | if (!(ehc->i.action & ATA_EH_RESET)) |
3748 | continue; |
3749 | |
3750 | rc = ata_eh_reset(link, classify: ata_link_nr_vacant(link), |
3751 | prereset, softreset, hardreset, postreset); |
3752 | if (rc) { |
3753 | ata_link_err(link, "reset failed, giving up\n" ); |
3754 | goto out; |
3755 | } |
3756 | } |
3757 | |
3758 | do { |
3759 | unsigned long now; |
3760 | |
3761 | /* |
3762 | * clears ATA_EH_PARK in eh_info and resets |
3763 | * ap->park_req_pending |
3764 | */ |
3765 | ata_eh_pull_park_action(ap); |
3766 | |
3767 | deadline = jiffies; |
3768 | ata_for_each_link(link, ap, EDGE) { |
3769 | ata_for_each_dev(dev, link, ALL) { |
3770 | struct ata_eh_context *ehc = &link->eh_context; |
3771 | unsigned long tmp; |
3772 | |
3773 | if (dev->class != ATA_DEV_ATA && |
3774 | dev->class != ATA_DEV_ZAC) |
3775 | continue; |
3776 | if (!(ehc->i.dev_action[dev->devno] & |
3777 | ATA_EH_PARK)) |
3778 | continue; |
3779 | tmp = dev->unpark_deadline; |
3780 | if (time_before(deadline, tmp)) |
3781 | deadline = tmp; |
3782 | else if (time_before_eq(tmp, jiffies)) |
3783 | continue; |
3784 | if (ehc->unloaded_mask & (1 << dev->devno)) |
3785 | continue; |
3786 | |
3787 | ata_eh_park_issue_cmd(dev, park: 1); |
3788 | } |
3789 | } |
3790 | |
3791 | now = jiffies; |
3792 | if (time_before_eq(deadline, now)) |
3793 | break; |
3794 | |
3795 | ata_eh_release(ap); |
3796 | deadline = wait_for_completion_timeout(x: &ap->park_req_pending, |
3797 | timeout: deadline - now); |
3798 | ata_eh_acquire(ap); |
3799 | } while (deadline); |
3800 | ata_for_each_link(link, ap, EDGE) { |
3801 | ata_for_each_dev(dev, link, ALL) { |
3802 | if (!(link->eh_context.unloaded_mask & |
3803 | (1 << dev->devno))) |
3804 | continue; |
3805 | |
3806 | ata_eh_park_issue_cmd(dev, park: 0); |
3807 | ata_eh_done(link, dev, action: ATA_EH_PARK); |
3808 | } |
3809 | } |
3810 | |
3811 | /* the rest */ |
3812 | nr_fails = 0; |
3813 | ata_for_each_link(link, ap, PMP_FIRST) { |
3814 | struct ata_eh_context *ehc = &link->eh_context; |
3815 | |
3816 | if (sata_pmp_attached(ap) && ata_is_host_link(link)) |
3817 | goto config_lpm; |
3818 | |
3819 | /* revalidate existing devices and attach new ones */ |
3820 | rc = ata_eh_revalidate_and_attach(link, r_failed_dev: &dev); |
3821 | if (rc) |
3822 | goto rest_fail; |
3823 | |
3824 | /* if PMP got attached, return, pmp EH will take care of it */ |
3825 | if (link->device->class == ATA_DEV_PMP) { |
3826 | ehc->i.action = 0; |
3827 | return 0; |
3828 | } |
3829 | |
3830 | /* configure transfer mode if necessary */ |
3831 | if (ehc->i.flags & ATA_EHI_SETMODE) { |
3832 | rc = ata_set_mode(link, r_failed_dev: &dev); |
3833 | if (rc) |
3834 | goto rest_fail; |
3835 | ehc->i.flags &= ~ATA_EHI_SETMODE; |
3836 | } |
3837 | |
3838 | /* If reset has been issued, clear UA to avoid |
3839 | * disrupting the current users of the device. |
3840 | */ |
3841 | if (ehc->i.flags & ATA_EHI_DID_RESET) { |
3842 | ata_for_each_dev(dev, link, ALL) { |
3843 | if (dev->class != ATA_DEV_ATAPI) |
3844 | continue; |
3845 | rc = atapi_eh_clear_ua(dev); |
3846 | if (rc) |
3847 | goto rest_fail; |
3848 | if (zpodd_dev_enabled(dev)) |
3849 | zpodd_post_poweron(dev); |
3850 | } |
3851 | } |
3852 | |
3853 | /* |
3854 | * Make sure to transition devices to the active power mode |
3855 | * if needed (e.g. if we were scheduled on system resume). |
3856 | */ |
3857 | ata_for_each_dev(dev, link, ENABLED) { |
3858 | if (ehc->i.dev_action[dev->devno] & ATA_EH_SET_ACTIVE) { |
3859 | ata_dev_power_set_active(dev); |
3860 | ata_eh_done(link, dev, action: ATA_EH_SET_ACTIVE); |
3861 | } |
3862 | } |
3863 | |
3864 | /* retry flush if necessary */ |
3865 | ata_for_each_dev(dev, link, ALL) { |
3866 | if (dev->class != ATA_DEV_ATA && |
3867 | dev->class != ATA_DEV_ZAC) |
3868 | continue; |
3869 | rc = ata_eh_maybe_retry_flush(dev); |
3870 | if (rc) |
3871 | goto rest_fail; |
3872 | } |
3873 | |
3874 | config_lpm: |
3875 | /* configure link power saving */ |
3876 | if (link->lpm_policy != ap->target_lpm_policy) { |
3877 | rc = ata_eh_set_lpm(link, policy: ap->target_lpm_policy, r_failed_dev: &dev); |
3878 | if (rc) |
3879 | goto rest_fail; |
3880 | } |
3881 | |
3882 | /* this link is okay now */ |
3883 | ehc->i.flags = 0; |
3884 | continue; |
3885 | |
3886 | rest_fail: |
3887 | nr_fails++; |
3888 | if (dev) |
3889 | ata_eh_handle_dev_fail(dev, err: rc); |
3890 | |
3891 | if (ata_port_is_frozen(ap)) { |
3892 | /* PMP reset requires working host port. |
3893 | * Can't retry if it's frozen. |
3894 | */ |
3895 | if (sata_pmp_attached(ap)) |
3896 | goto out; |
3897 | break; |
3898 | } |
3899 | } |
3900 | |
3901 | if (nr_fails) |
3902 | goto retry; |
3903 | |
3904 | out: |
3905 | if (rc && r_failed_link) |
3906 | *r_failed_link = link; |
3907 | |
3908 | return rc; |
3909 | } |
3910 | |
3911 | /** |
3912 | * ata_eh_finish - finish up EH |
3913 | * @ap: host port to finish EH for |
3914 | * |
3915 | * Recovery is complete. Clean up EH states and retry or finish |
3916 | * failed qcs. |
3917 | * |
3918 | * LOCKING: |
3919 | * None. |
3920 | */ |
3921 | void ata_eh_finish(struct ata_port *ap) |
3922 | { |
3923 | struct ata_queued_cmd *qc; |
3924 | int tag; |
3925 | |
3926 | /* retry or finish qcs */ |
3927 | ata_qc_for_each_raw(ap, qc, tag) { |
3928 | if (!(qc->flags & ATA_QCFLAG_EH)) |
3929 | continue; |
3930 | |
3931 | if (qc->err_mask) { |
3932 | /* FIXME: Once EH migration is complete, |
3933 | * generate sense data in this function, |
3934 | * considering both err_mask and tf. |
3935 | */ |
3936 | if (qc->flags & ATA_QCFLAG_RETRY) { |
3937 | /* |
3938 | * Since qc->err_mask is set, ata_eh_qc_retry() |
3939 | * will not increment scmd->allowed, so upper |
3940 | * layer will only retry the command if it has |
3941 | * not already been retried too many times. |
3942 | */ |
3943 | ata_eh_qc_retry(qc); |
3944 | } else { |
3945 | ata_eh_qc_complete(qc); |
3946 | } |
3947 | } else { |
3948 | if (qc->flags & ATA_QCFLAG_SENSE_VALID || |
3949 | qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) { |
3950 | ata_eh_qc_complete(qc); |
3951 | } else { |
3952 | /* feed zero TF to sense generation */ |
3953 | memset(&qc->result_tf, 0, sizeof(qc->result_tf)); |
3954 | /* |
3955 | * Since qc->err_mask is not set, |
3956 | * ata_eh_qc_retry() will increment |
3957 | * scmd->allowed, so upper layer is guaranteed |
3958 | * to retry the command. |
3959 | */ |
3960 | ata_eh_qc_retry(qc); |
3961 | } |
3962 | } |
3963 | } |
3964 | |
3965 | /* make sure nr_active_links is zero after EH */ |
3966 | WARN_ON(ap->nr_active_links); |
3967 | ap->nr_active_links = 0; |
3968 | } |
3969 | |
3970 | /** |
3971 | * ata_do_eh - do standard error handling |
3972 | * @ap: host port to handle error for |
3973 | * |
3974 | * @prereset: prereset method (can be NULL) |
3975 | * @softreset: softreset method (can be NULL) |
3976 | * @hardreset: hardreset method (can be NULL) |
3977 | * @postreset: postreset method (can be NULL) |
3978 | * |
3979 | * Perform standard error handling sequence. |
3980 | * |
3981 | * LOCKING: |
3982 | * Kernel thread context (may sleep). |
3983 | */ |
3984 | void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, |
3985 | ata_reset_fn_t softreset, ata_reset_fn_t hardreset, |
3986 | ata_postreset_fn_t postreset) |
3987 | { |
3988 | struct ata_device *dev; |
3989 | int rc; |
3990 | |
3991 | ata_eh_autopsy(ap); |
3992 | ata_eh_report(ap); |
3993 | |
3994 | rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, |
3995 | NULL); |
3996 | if (rc) { |
3997 | ata_for_each_dev(dev, &ap->link, ALL) |
3998 | ata_dev_disable(dev); |
3999 | } |
4000 | |
4001 | ata_eh_finish(ap); |
4002 | } |
4003 | |
4004 | /** |
4005 | * ata_std_error_handler - standard error handler |
4006 | * @ap: host port to handle error for |
4007 | * |
4008 | * Standard error handler |
4009 | * |
4010 | * LOCKING: |
4011 | * Kernel thread context (may sleep). |
4012 | */ |
4013 | void ata_std_error_handler(struct ata_port *ap) |
4014 | { |
4015 | struct ata_port_operations *ops = ap->ops; |
4016 | ata_reset_fn_t hardreset = ops->hardreset; |
4017 | |
4018 | /* ignore built-in hardreset if SCR access is not available */ |
4019 | if (hardreset == sata_std_hardreset && !sata_scr_valid(link: &ap->link)) |
4020 | hardreset = NULL; |
4021 | |
4022 | ata_do_eh(ap, prereset: ops->prereset, softreset: ops->softreset, hardreset, postreset: ops->postreset); |
4023 | } |
4024 | EXPORT_SYMBOL_GPL(ata_std_error_handler); |
4025 | |
4026 | #ifdef CONFIG_PM |
4027 | /** |
4028 | * ata_eh_handle_port_suspend - perform port suspend operation |
4029 | * @ap: port to suspend |
4030 | * |
4031 | * Suspend @ap. |
4032 | * |
4033 | * LOCKING: |
4034 | * Kernel thread context (may sleep). |
4035 | */ |
4036 | static void ata_eh_handle_port_suspend(struct ata_port *ap) |
4037 | { |
4038 | unsigned long flags; |
4039 | int rc = 0; |
4040 | struct ata_device *dev; |
4041 | struct ata_link *link; |
4042 | |
4043 | /* are we suspending? */ |
4044 | spin_lock_irqsave(ap->lock, flags); |
4045 | if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || |
4046 | ap->pm_mesg.event & PM_EVENT_RESUME) { |
4047 | spin_unlock_irqrestore(lock: ap->lock, flags); |
4048 | return; |
4049 | } |
4050 | spin_unlock_irqrestore(lock: ap->lock, flags); |
4051 | |
4052 | WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); |
4053 | |
4054 | /* Set all devices attached to the port in standby mode */ |
4055 | ata_for_each_link(link, ap, HOST_FIRST) { |
4056 | ata_for_each_dev(dev, link, ENABLED) |
4057 | ata_dev_power_set_standby(dev); |
4058 | } |
4059 | |
4060 | /* |
4061 | * If we have a ZPODD attached, check its zero |
4062 | * power ready status before the port is frozen. |
4063 | * Only needed for runtime suspend. |
4064 | */ |
4065 | if (PMSG_IS_AUTO(ap->pm_mesg)) { |
4066 | ata_for_each_dev(dev, &ap->link, ENABLED) { |
4067 | if (zpodd_dev_enabled(dev)) |
4068 | zpodd_on_suspend(dev); |
4069 | } |
4070 | } |
4071 | |
4072 | /* suspend */ |
4073 | ata_eh_freeze_port(ap); |
4074 | |
4075 | if (ap->ops->port_suspend) |
4076 | rc = ap->ops->port_suspend(ap, ap->pm_mesg); |
4077 | |
4078 | ata_acpi_set_state(ap, state: ap->pm_mesg); |
4079 | |
4080 | /* update the flags */ |
4081 | spin_lock_irqsave(ap->lock, flags); |
4082 | |
4083 | ap->pflags &= ~ATA_PFLAG_PM_PENDING; |
4084 | if (rc == 0) |
4085 | ap->pflags |= ATA_PFLAG_SUSPENDED; |
4086 | else if (ata_port_is_frozen(ap)) |
4087 | ata_port_schedule_eh(ap); |
4088 | |
4089 | spin_unlock_irqrestore(lock: ap->lock, flags); |
4090 | |
4091 | return; |
4092 | } |
4093 | |
4094 | /** |
4095 | * ata_eh_handle_port_resume - perform port resume operation |
4096 | * @ap: port to resume |
4097 | * |
4098 | * Resume @ap. |
4099 | * |
4100 | * LOCKING: |
4101 | * Kernel thread context (may sleep). |
4102 | */ |
4103 | static void ata_eh_handle_port_resume(struct ata_port *ap) |
4104 | { |
4105 | struct ata_link *link; |
4106 | struct ata_device *dev; |
4107 | unsigned long flags; |
4108 | |
4109 | /* are we resuming? */ |
4110 | spin_lock_irqsave(ap->lock, flags); |
4111 | if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || |
4112 | !(ap->pm_mesg.event & PM_EVENT_RESUME)) { |
4113 | spin_unlock_irqrestore(lock: ap->lock, flags); |
4114 | return; |
4115 | } |
4116 | spin_unlock_irqrestore(lock: ap->lock, flags); |
4117 | |
4118 | WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); |
4119 | |
4120 | /* |
4121 | * Error timestamps are in jiffies which doesn't run while |
4122 | * suspended and PHY events during resume isn't too uncommon. |
4123 | * When the two are combined, it can lead to unnecessary speed |
4124 | * downs if the machine is suspended and resumed repeatedly. |
4125 | * Clear error history. |
4126 | */ |
4127 | ata_for_each_link(link, ap, HOST_FIRST) |
4128 | ata_for_each_dev(dev, link, ALL) |
4129 | ata_ering_clear(ering: &dev->ering); |
4130 | |
4131 | ata_acpi_set_state(ap, state: ap->pm_mesg); |
4132 | |
4133 | if (ap->ops->port_resume) |
4134 | ap->ops->port_resume(ap); |
4135 | |
4136 | /* tell ACPI that we're resuming */ |
4137 | ata_acpi_on_resume(ap); |
4138 | |
4139 | /* update the flags */ |
4140 | spin_lock_irqsave(ap->lock, flags); |
4141 | ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); |
4142 | ap->pflags |= ATA_PFLAG_RESUMING; |
4143 | spin_unlock_irqrestore(lock: ap->lock, flags); |
4144 | } |
4145 | #endif /* CONFIG_PM */ |
4146 | |