| 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* |
| 3 | drbd_req.h |
| 4 | |
| 5 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. |
| 6 | |
| 7 | Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. |
| 8 | Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. |
| 9 | Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>. |
| 10 | |
| 11 | */ |
| 12 | |
| 13 | #ifndef _DRBD_REQ_H |
| 14 | #define _DRBD_REQ_H |
| 15 | |
| 16 | #include <linux/module.h> |
| 17 | |
| 18 | #include <linux/slab.h> |
| 19 | #include <linux/drbd.h> |
| 20 | #include "drbd_int.h" |
| 21 | |
| 22 | /* The request callbacks will be called in irq context by the IDE drivers, |
| 23 | and in Softirqs/Tasklets/BH context by the SCSI drivers, |
| 24 | and by the receiver and worker in kernel-thread context. |
| 25 | Try to get the locking right :) */ |
| 26 | |
| 27 | /* |
| 28 | * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are |
| 29 | * associated with IO requests originating from the block layer above us. |
| 30 | * |
| 31 | * There are quite a few things that may happen to a drbd request |
| 32 | * during its lifetime. |
| 33 | * |
| 34 | * It will be created. |
| 35 | * It will be marked with the intention to be |
| 36 | * submitted to local disk and/or |
| 37 | * send via the network. |
| 38 | * |
| 39 | * It has to be placed on the transfer log and other housekeeping lists, |
| 40 | * In case we have a network connection. |
| 41 | * |
| 42 | * It may be identified as a concurrent (write) request |
| 43 | * and be handled accordingly. |
| 44 | * |
| 45 | * It may me handed over to the local disk subsystem. |
| 46 | * It may be completed by the local disk subsystem, |
| 47 | * either successfully or with io-error. |
| 48 | * In case it is a READ request, and it failed locally, |
| 49 | * it may be retried remotely. |
| 50 | * |
| 51 | * It may be queued for sending. |
| 52 | * It may be handed over to the network stack, |
| 53 | * which may fail. |
| 54 | * It may be acknowledged by the "peer" according to the wire_protocol in use. |
| 55 | * this may be a negative ack. |
| 56 | * It may receive a faked ack when the network connection is lost and the |
| 57 | * transfer log is cleaned up. |
| 58 | * Sending may be canceled due to network connection loss. |
| 59 | * When it finally has outlived its time, |
| 60 | * corresponding dirty bits in the resync-bitmap may be cleared or set, |
| 61 | * it will be destroyed, |
| 62 | * and completion will be signalled to the originator, |
| 63 | * with or without "success". |
| 64 | */ |
| 65 | |
| 66 | enum drbd_req_event { |
| 67 | CREATED, |
| 68 | TO_BE_SENT, |
| 69 | TO_BE_SUBMITTED, |
| 70 | |
| 71 | /* XXX yes, now I am inconsistent... |
| 72 | * these are not "events" but "actions" |
| 73 | * oh, well... */ |
| 74 | QUEUE_FOR_NET_WRITE, |
| 75 | QUEUE_FOR_NET_READ, |
| 76 | QUEUE_FOR_SEND_OOS, |
| 77 | |
| 78 | /* An empty flush is queued as P_BARRIER, |
| 79 | * which will cause it to complete "successfully", |
| 80 | * even if the local disk flush failed. |
| 81 | * |
| 82 | * Just like "real" requests, empty flushes (blkdev_issue_flush()) will |
| 83 | * only see an error if neither local nor remote data is reachable. */ |
| 84 | QUEUE_AS_DRBD_BARRIER, |
| 85 | |
| 86 | SEND_CANCELED, |
| 87 | SEND_FAILED, |
| 88 | HANDED_OVER_TO_NETWORK, |
| 89 | OOS_HANDED_TO_NETWORK, |
| 90 | CONNECTION_LOST_WHILE_PENDING, |
| 91 | READ_RETRY_REMOTE_CANCELED, |
| 92 | RECV_ACKED_BY_PEER, |
| 93 | WRITE_ACKED_BY_PEER, |
| 94 | WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ |
| 95 | CONFLICT_RESOLVED, |
| 96 | POSTPONE_WRITE, |
| 97 | NEG_ACKED, |
| 98 | BARRIER_ACKED, /* in protocol A and B */ |
| 99 | DATA_RECEIVED, /* (remote read) */ |
| 100 | |
| 101 | COMPLETED_OK, |
| 102 | READ_COMPLETED_WITH_ERROR, |
| 103 | READ_AHEAD_COMPLETED_WITH_ERROR, |
| 104 | WRITE_COMPLETED_WITH_ERROR, |
| 105 | DISCARD_COMPLETED_NOTSUPP, |
| 106 | DISCARD_COMPLETED_WITH_ERROR, |
| 107 | |
| 108 | ABORT_DISK_IO, |
| 109 | RESEND, |
| 110 | FAIL_FROZEN_DISK_IO, |
| 111 | RESTART_FROZEN_DISK_IO, |
| 112 | NOTHING, |
| 113 | }; |
| 114 | |
| 115 | /* encoding of request states for now. we don't actually need that many bits. |
| 116 | * we don't need to do atomic bit operations either, since most of the time we |
| 117 | * need to look at the connection state and/or manipulate some lists at the |
| 118 | * same time, so we should hold the request lock anyways. |
| 119 | */ |
| 120 | enum drbd_req_state_bits { |
| 121 | /* 3210 |
| 122 | * 0000: no local possible |
| 123 | * 0001: to be submitted |
| 124 | * UNUSED, we could map: 011: submitted, completion still pending |
| 125 | * 0110: completed ok |
| 126 | * 0010: completed with error |
| 127 | * 1001: Aborted (before completion) |
| 128 | * 1x10: Aborted and completed -> free |
| 129 | */ |
| 130 | __RQ_LOCAL_PENDING, |
| 131 | __RQ_LOCAL_COMPLETED, |
| 132 | __RQ_LOCAL_OK, |
| 133 | __RQ_LOCAL_ABORTED, |
| 134 | |
| 135 | /* 87654 |
| 136 | * 00000: no network possible |
| 137 | * 00001: to be send |
| 138 | * 00011: to be send, on worker queue |
| 139 | * 00101: sent, expecting recv_ack (B) or write_ack (C) |
| 140 | * 11101: sent, |
| 141 | * recv_ack (B) or implicit "ack" (A), |
| 142 | * still waiting for the barrier ack. |
| 143 | * master_bio may already be completed and invalidated. |
| 144 | * 11100: write acked (C), |
| 145 | * data received (for remote read, any protocol) |
| 146 | * or finally the barrier ack has arrived (B,A)... |
| 147 | * request can be freed |
| 148 | * 01100: neg-acked (write, protocol C) |
| 149 | * or neg-d-acked (read, any protocol) |
| 150 | * or killed from the transfer log |
| 151 | * during cleanup after connection loss |
| 152 | * request can be freed |
| 153 | * 01000: canceled or send failed... |
| 154 | * request can be freed |
| 155 | */ |
| 156 | |
| 157 | /* if "SENT" is not set, yet, this can still fail or be canceled. |
| 158 | * if "SENT" is set already, we still wait for an Ack packet. |
| 159 | * when cleared, the master_bio may be completed. |
| 160 | * in (B,A) the request object may still linger on the transaction log |
| 161 | * until the corresponding barrier ack comes in */ |
| 162 | __RQ_NET_PENDING, |
| 163 | |
| 164 | /* If it is QUEUED, and it is a WRITE, it is also registered in the |
| 165 | * transfer log. Currently we need this flag to avoid conflicts between |
| 166 | * worker canceling the request and tl_clear_barrier killing it from |
| 167 | * transfer log. We should restructure the code so this conflict does |
| 168 | * no longer occur. */ |
| 169 | __RQ_NET_QUEUED, |
| 170 | |
| 171 | /* well, actually only "handed over to the network stack". |
| 172 | * |
| 173 | * TODO can potentially be dropped because of the similar meaning |
| 174 | * of RQ_NET_SENT and ~RQ_NET_QUEUED. |
| 175 | * however it is not exactly the same. before we drop it |
| 176 | * we must ensure that we can tell a request with network part |
| 177 | * from a request without, regardless of what happens to it. */ |
| 178 | __RQ_NET_SENT, |
| 179 | |
| 180 | /* when set, the request may be freed (if RQ_NET_QUEUED is clear). |
| 181 | * basically this means the corresponding P_BARRIER_ACK was received */ |
| 182 | __RQ_NET_DONE, |
| 183 | |
| 184 | /* whether or not we know (C) or pretend (B,A) that the write |
| 185 | * was successfully written on the peer. |
| 186 | */ |
| 187 | __RQ_NET_OK, |
| 188 | |
| 189 | /* peer called drbd_set_in_sync() for this write */ |
| 190 | __RQ_NET_SIS, |
| 191 | |
| 192 | /* keep this last, its for the RQ_NET_MASK */ |
| 193 | __RQ_NET_MAX, |
| 194 | |
| 195 | /* Set when this is a write, clear for a read */ |
| 196 | __RQ_WRITE, |
| 197 | __RQ_WSAME, |
| 198 | __RQ_UNMAP, |
| 199 | __RQ_ZEROES, |
| 200 | |
| 201 | /* Should call drbd_al_complete_io() for this request... */ |
| 202 | __RQ_IN_ACT_LOG, |
| 203 | |
| 204 | /* This was the most recent request during some blk_finish_plug() |
| 205 | * or its implicit from-schedule equivalent. |
| 206 | * We may use it as hint to send a P_UNPLUG_REMOTE */ |
| 207 | __RQ_UNPLUG, |
| 208 | |
| 209 | /* The peer has sent a retry ACK */ |
| 210 | __RQ_POSTPONED, |
| 211 | |
| 212 | /* would have been completed, |
| 213 | * but was not, because of drbd_suspended() */ |
| 214 | __RQ_COMPLETION_SUSP, |
| 215 | |
| 216 | /* We expect a receive ACK (wire proto B) */ |
| 217 | __RQ_EXP_RECEIVE_ACK, |
| 218 | |
| 219 | /* We expect a write ACK (wite proto C) */ |
| 220 | __RQ_EXP_WRITE_ACK, |
| 221 | |
| 222 | /* waiting for a barrier ack, did an extra kref_get */ |
| 223 | __RQ_EXP_BARR_ACK, |
| 224 | }; |
| 225 | |
| 226 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) |
| 227 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) |
| 228 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) |
| 229 | #define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) |
| 230 | |
| 231 | #define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) |
| 232 | |
| 233 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) |
| 234 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) |
| 235 | #define RQ_NET_SENT (1UL << __RQ_NET_SENT) |
| 236 | #define RQ_NET_DONE (1UL << __RQ_NET_DONE) |
| 237 | #define RQ_NET_OK (1UL << __RQ_NET_OK) |
| 238 | #define RQ_NET_SIS (1UL << __RQ_NET_SIS) |
| 239 | |
| 240 | #define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) |
| 241 | |
| 242 | #define RQ_WRITE (1UL << __RQ_WRITE) |
| 243 | #define RQ_WSAME (1UL << __RQ_WSAME) |
| 244 | #define RQ_UNMAP (1UL << __RQ_UNMAP) |
| 245 | #define RQ_ZEROES (1UL << __RQ_ZEROES) |
| 246 | #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) |
| 247 | #define RQ_UNPLUG (1UL << __RQ_UNPLUG) |
| 248 | #define RQ_POSTPONED (1UL << __RQ_POSTPONED) |
| 249 | #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) |
| 250 | #define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK) |
| 251 | #define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK) |
| 252 | #define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK) |
| 253 | |
| 254 | /* For waking up the frozen transfer log mod_req() has to return if the request |
| 255 | should be counted in the epoch object*/ |
| 256 | #define MR_WRITE 1 |
| 257 | #define MR_READ 2 |
| 258 | |
| 259 | /* Short lived temporary struct on the stack. |
| 260 | * We could squirrel the error to be returned into |
| 261 | * bio->bi_iter.bi_size, or similar. But that would be too ugly. */ |
| 262 | struct bio_and_error { |
| 263 | struct bio *bio; |
| 264 | int error; |
| 265 | }; |
| 266 | |
| 267 | extern void start_new_tl_epoch(struct drbd_connection *connection); |
| 268 | extern void drbd_req_destroy(struct kref *kref); |
| 269 | extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, |
| 270 | struct drbd_peer_device *peer_device, |
| 271 | struct bio_and_error *m); |
| 272 | extern void complete_master_bio(struct drbd_device *device, |
| 273 | struct bio_and_error *m); |
| 274 | extern void request_timer_fn(struct timer_list *t); |
| 275 | extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); |
| 276 | extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); |
| 277 | extern void tl_abort_disk_io(struct drbd_device *device); |
| 278 | |
| 279 | /* this is in drbd_main.c */ |
| 280 | extern void drbd_restart_request(struct drbd_request *req); |
| 281 | |
| 282 | /* use this if you don't want to deal with calling complete_master_bio() |
| 283 | * outside the spinlock, e.g. when walking some list on cleanup. */ |
| 284 | static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what, |
| 285 | struct drbd_peer_device *peer_device) |
| 286 | { |
| 287 | struct drbd_device *device = req->device; |
| 288 | struct bio_and_error m; |
| 289 | int rv; |
| 290 | |
| 291 | /* __req_mod possibly frees req, do not touch req after that! */ |
| 292 | rv = __req_mod(req, what, peer_device, m: &m); |
| 293 | if (m.bio) |
| 294 | complete_master_bio(device, m: &m); |
| 295 | |
| 296 | return rv; |
| 297 | } |
| 298 | |
| 299 | /* completion of master bio is outside of our spinlock. |
| 300 | * We still may or may not be inside some irqs disabled section |
| 301 | * of the lower level driver completion callback, so we need to |
| 302 | * spin_lock_irqsave here. */ |
| 303 | static inline int req_mod(struct drbd_request *req, |
| 304 | enum drbd_req_event what, |
| 305 | struct drbd_peer_device *peer_device) |
| 306 | { |
| 307 | unsigned long flags; |
| 308 | struct drbd_device *device = req->device; |
| 309 | struct bio_and_error m; |
| 310 | int rv; |
| 311 | |
| 312 | spin_lock_irqsave(&device->resource->req_lock, flags); |
| 313 | rv = __req_mod(req, what, peer_device, m: &m); |
| 314 | spin_unlock_irqrestore(lock: &device->resource->req_lock, flags); |
| 315 | |
| 316 | if (m.bio) |
| 317 | complete_master_bio(device, m: &m); |
| 318 | |
| 319 | return rv; |
| 320 | } |
| 321 | |
| 322 | extern bool drbd_should_do_remote(union drbd_dev_state); |
| 323 | |
| 324 | #endif |
| 325 | |