1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Network filesystem high-level read support. |
3 | * |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/module.h> |
9 | #include <linux/export.h> |
10 | #include <linux/fs.h> |
11 | #include <linux/mm.h> |
12 | #include <linux/pagemap.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/uio.h> |
15 | #include <linux/sched/mm.h> |
16 | #include <linux/task_io_accounting_ops.h> |
17 | #include "internal.h" |
18 | |
19 | /* |
20 | * Clear the unread part of an I/O request. |
21 | */ |
22 | static void netfs_clear_unread(struct netfs_io_subrequest *subreq) |
23 | { |
24 | iov_iter_zero(bytes: iov_iter_count(i: &subreq->io_iter), &subreq->io_iter); |
25 | } |
26 | |
27 | static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, |
28 | bool was_async) |
29 | { |
30 | struct netfs_io_subrequest *subreq = priv; |
31 | |
32 | netfs_subreq_terminated(subreq, transferred_or_error, was_async); |
33 | } |
34 | |
35 | /* |
36 | * Issue a read against the cache. |
37 | * - Eats the caller's ref on subreq. |
38 | */ |
39 | static void netfs_read_from_cache(struct netfs_io_request *rreq, |
40 | struct netfs_io_subrequest *subreq, |
41 | enum netfs_read_from_hole read_hole) |
42 | { |
43 | struct netfs_cache_resources *cres = &rreq->cache_resources; |
44 | |
45 | netfs_stat(stat: &netfs_n_rh_read); |
46 | cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole, |
47 | netfs_cache_read_terminated, subreq); |
48 | } |
49 | |
50 | /* |
51 | * Fill a subrequest region with zeroes. |
52 | */ |
53 | static void netfs_fill_with_zeroes(struct netfs_io_request *rreq, |
54 | struct netfs_io_subrequest *subreq) |
55 | { |
56 | netfs_stat(stat: &netfs_n_rh_zero); |
57 | __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); |
58 | netfs_subreq_terminated(subreq, 0, false); |
59 | } |
60 | |
61 | /* |
62 | * Ask the netfs to issue a read request to the server for us. |
63 | * |
64 | * The netfs is expected to read from subreq->pos + subreq->transferred to |
65 | * subreq->pos + subreq->len - 1. It may not backtrack and write data into the |
66 | * buffer prior to the transferred point as it might clobber dirty data |
67 | * obtained from the cache. |
68 | * |
69 | * Alternatively, the netfs is allowed to indicate one of two things: |
70 | * |
71 | * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and |
72 | * make progress. |
73 | * |
74 | * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be |
75 | * cleared. |
76 | */ |
77 | static void netfs_read_from_server(struct netfs_io_request *rreq, |
78 | struct netfs_io_subrequest *subreq) |
79 | { |
80 | netfs_stat(stat: &netfs_n_rh_download); |
81 | |
82 | if (rreq->origin != NETFS_DIO_READ && |
83 | iov_iter_count(i: &subreq->io_iter) != subreq->len - subreq->transferred) |
84 | pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n" , |
85 | rreq->debug_id, subreq->debug_index, |
86 | iov_iter_count(&subreq->io_iter), subreq->len, |
87 | subreq->transferred, subreq->flags); |
88 | rreq->netfs_ops->issue_read(subreq); |
89 | } |
90 | |
91 | /* |
92 | * Release those waiting. |
93 | */ |
94 | static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) |
95 | { |
96 | trace_netfs_rreq(rreq, what: netfs_rreq_trace_done); |
97 | netfs_clear_subrequests(rreq, was_async); |
98 | netfs_put_request(rreq, was_async, what: netfs_rreq_trace_put_complete); |
99 | } |
100 | |
101 | /* |
102 | * Deal with the completion of writing the data to the cache. We have to clear |
103 | * the PG_fscache bits on the folios involved and release the caller's ref. |
104 | * |
105 | * May be called in softirq mode and we inherit a ref from the caller. |
106 | */ |
107 | static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, |
108 | bool was_async) |
109 | { |
110 | struct netfs_io_subrequest *subreq; |
111 | struct folio *folio; |
112 | pgoff_t unlocked = 0; |
113 | bool have_unlocked = false; |
114 | |
115 | rcu_read_lock(); |
116 | |
117 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
118 | XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); |
119 | |
120 | xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { |
121 | if (xas_retry(xas: &xas, entry: folio)) |
122 | continue; |
123 | |
124 | /* We might have multiple writes from the same huge |
125 | * folio, but we mustn't unlock a folio more than once. |
126 | */ |
127 | if (have_unlocked && folio->index <= unlocked) |
128 | continue; |
129 | unlocked = folio_next_index(folio) - 1; |
130 | trace_netfs_folio(folio, why: netfs_folio_trace_end_copy); |
131 | folio_end_fscache(folio); |
132 | have_unlocked = true; |
133 | } |
134 | } |
135 | |
136 | rcu_read_unlock(); |
137 | netfs_rreq_completed(rreq, was_async); |
138 | } |
139 | |
140 | static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, |
141 | bool was_async) |
142 | { |
143 | struct netfs_io_subrequest *subreq = priv; |
144 | struct netfs_io_request *rreq = subreq->rreq; |
145 | |
146 | if (IS_ERR_VALUE(transferred_or_error)) { |
147 | netfs_stat(stat: &netfs_n_rh_write_failed); |
148 | trace_netfs_failure(rreq, sreq: subreq, error: transferred_or_error, |
149 | what: netfs_fail_copy_to_cache); |
150 | } else { |
151 | netfs_stat(stat: &netfs_n_rh_write_done); |
152 | } |
153 | |
154 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_write_term); |
155 | |
156 | /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ |
157 | if (atomic_dec_and_test(v: &rreq->nr_copy_ops)) |
158 | netfs_rreq_unmark_after_write(rreq, was_async); |
159 | |
160 | netfs_put_subrequest(subreq, was_async, what: netfs_sreq_trace_put_terminated); |
161 | } |
162 | |
163 | /* |
164 | * Perform any outstanding writes to the cache. We inherit a ref from the |
165 | * caller. |
166 | */ |
167 | static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) |
168 | { |
169 | struct netfs_cache_resources *cres = &rreq->cache_resources; |
170 | struct netfs_io_subrequest *subreq, *next, *p; |
171 | struct iov_iter iter; |
172 | int ret; |
173 | |
174 | trace_netfs_rreq(rreq, what: netfs_rreq_trace_copy); |
175 | |
176 | /* We don't want terminating writes trying to wake us up whilst we're |
177 | * still going through the list. |
178 | */ |
179 | atomic_inc(v: &rreq->nr_copy_ops); |
180 | |
181 | list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { |
182 | if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { |
183 | list_del_init(entry: &subreq->rreq_link); |
184 | netfs_put_subrequest(subreq, was_async: false, |
185 | what: netfs_sreq_trace_put_no_copy); |
186 | } |
187 | } |
188 | |
189 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
190 | /* Amalgamate adjacent writes */ |
191 | while (!list_is_last(list: &subreq->rreq_link, head: &rreq->subrequests)) { |
192 | next = list_next_entry(subreq, rreq_link); |
193 | if (next->start != subreq->start + subreq->len) |
194 | break; |
195 | subreq->len += next->len; |
196 | list_del_init(entry: &next->rreq_link); |
197 | netfs_put_subrequest(subreq: next, was_async: false, |
198 | what: netfs_sreq_trace_put_merged); |
199 | } |
200 | |
201 | ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, |
202 | subreq->len, rreq->i_size, true); |
203 | if (ret < 0) { |
204 | trace_netfs_failure(rreq, sreq: subreq, error: ret, what: netfs_fail_prepare_write); |
205 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_write_skip); |
206 | continue; |
207 | } |
208 | |
209 | iov_iter_xarray(i: &iter, ITER_SOURCE, xarray: &rreq->mapping->i_pages, |
210 | start: subreq->start, count: subreq->len); |
211 | |
212 | atomic_inc(v: &rreq->nr_copy_ops); |
213 | netfs_stat(stat: &netfs_n_rh_write); |
214 | netfs_get_subrequest(subreq, what: netfs_sreq_trace_get_copy_to_cache); |
215 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_write); |
216 | cres->ops->write(cres, subreq->start, &iter, |
217 | netfs_rreq_copy_terminated, subreq); |
218 | } |
219 | |
220 | /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ |
221 | if (atomic_dec_and_test(v: &rreq->nr_copy_ops)) |
222 | netfs_rreq_unmark_after_write(rreq, was_async: false); |
223 | } |
224 | |
225 | static void netfs_rreq_write_to_cache_work(struct work_struct *work) |
226 | { |
227 | struct netfs_io_request *rreq = |
228 | container_of(work, struct netfs_io_request, work); |
229 | |
230 | netfs_rreq_do_write_to_cache(rreq); |
231 | } |
232 | |
233 | static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) |
234 | { |
235 | rreq->work.func = netfs_rreq_write_to_cache_work; |
236 | if (!queue_work(wq: system_unbound_wq, work: &rreq->work)) |
237 | BUG(); |
238 | } |
239 | |
240 | /* |
241 | * Handle a short read. |
242 | */ |
243 | static void netfs_rreq_short_read(struct netfs_io_request *rreq, |
244 | struct netfs_io_subrequest *subreq) |
245 | { |
246 | __clear_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); |
247 | __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags); |
248 | |
249 | netfs_stat(stat: &netfs_n_rh_short_read); |
250 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_resubmit_short); |
251 | |
252 | netfs_get_subrequest(subreq, what: netfs_sreq_trace_get_short_read); |
253 | atomic_inc(v: &rreq->nr_outstanding); |
254 | if (subreq->source == NETFS_READ_FROM_CACHE) |
255 | netfs_read_from_cache(rreq, subreq, read_hole: NETFS_READ_HOLE_CLEAR); |
256 | else |
257 | netfs_read_from_server(rreq, subreq); |
258 | } |
259 | |
260 | /* |
261 | * Reset the subrequest iterator prior to resubmission. |
262 | */ |
263 | static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, |
264 | struct netfs_io_subrequest *subreq) |
265 | { |
266 | size_t remaining = subreq->len - subreq->transferred; |
267 | size_t count = iov_iter_count(i: &subreq->io_iter); |
268 | |
269 | if (count == remaining) |
270 | return; |
271 | |
272 | _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n" , |
273 | rreq->debug_id, subreq->debug_index, |
274 | iov_iter_count(&subreq->io_iter), subreq->transferred, |
275 | subreq->len, rreq->i_size, |
276 | subreq->io_iter.iter_type); |
277 | |
278 | if (count < remaining) |
279 | iov_iter_revert(i: &subreq->io_iter, bytes: remaining - count); |
280 | else |
281 | iov_iter_advance(i: &subreq->io_iter, bytes: count - remaining); |
282 | } |
283 | |
284 | /* |
285 | * Resubmit any short or failed operations. Returns true if we got the rreq |
286 | * ref back. |
287 | */ |
288 | static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) |
289 | { |
290 | struct netfs_io_subrequest *subreq; |
291 | |
292 | WARN_ON(in_interrupt()); |
293 | |
294 | trace_netfs_rreq(rreq, what: netfs_rreq_trace_resubmit); |
295 | |
296 | /* We don't want terminating submissions trying to wake us up whilst |
297 | * we're still going through the list. |
298 | */ |
299 | atomic_inc(v: &rreq->nr_outstanding); |
300 | |
301 | __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); |
302 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
303 | if (subreq->error) { |
304 | if (subreq->source != NETFS_READ_FROM_CACHE) |
305 | break; |
306 | subreq->source = NETFS_DOWNLOAD_FROM_SERVER; |
307 | subreq->error = 0; |
308 | netfs_stat(stat: &netfs_n_rh_download_instead); |
309 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_download_instead); |
310 | netfs_get_subrequest(subreq, what: netfs_sreq_trace_get_resubmit); |
311 | atomic_inc(v: &rreq->nr_outstanding); |
312 | netfs_reset_subreq_iter(rreq, subreq); |
313 | netfs_read_from_server(rreq, subreq); |
314 | } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { |
315 | netfs_rreq_short_read(rreq, subreq); |
316 | } |
317 | } |
318 | |
319 | /* If we decrement nr_outstanding to 0, the usage ref belongs to us. */ |
320 | if (atomic_dec_and_test(v: &rreq->nr_outstanding)) |
321 | return true; |
322 | |
323 | wake_up_var(var: &rreq->nr_outstanding); |
324 | return false; |
325 | } |
326 | |
327 | /* |
328 | * Check to see if the data read is still valid. |
329 | */ |
330 | static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq) |
331 | { |
332 | struct netfs_io_subrequest *subreq; |
333 | |
334 | if (!rreq->netfs_ops->is_still_valid || |
335 | rreq->netfs_ops->is_still_valid(rreq)) |
336 | return; |
337 | |
338 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
339 | if (subreq->source == NETFS_READ_FROM_CACHE) { |
340 | subreq->error = -ESTALE; |
341 | __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); |
342 | } |
343 | } |
344 | } |
345 | |
346 | /* |
347 | * Determine how much we can admit to having read from a DIO read. |
348 | */ |
349 | static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) |
350 | { |
351 | struct netfs_io_subrequest *subreq; |
352 | unsigned int i; |
353 | size_t transferred = 0; |
354 | |
355 | for (i = 0; i < rreq->direct_bv_count; i++) |
356 | flush_dcache_page(page: rreq->direct_bv[i].bv_page); |
357 | |
358 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
359 | if (subreq->error || subreq->transferred == 0) |
360 | break; |
361 | transferred += subreq->transferred; |
362 | if (subreq->transferred < subreq->len) |
363 | break; |
364 | } |
365 | |
366 | for (i = 0; i < rreq->direct_bv_count; i++) |
367 | flush_dcache_page(page: rreq->direct_bv[i].bv_page); |
368 | |
369 | rreq->transferred = transferred; |
370 | task_io_account_read(bytes: transferred); |
371 | |
372 | if (rreq->iocb) { |
373 | rreq->iocb->ki_pos += transferred; |
374 | if (rreq->iocb->ki_complete) |
375 | rreq->iocb->ki_complete( |
376 | rreq->iocb, rreq->error ? rreq->error : transferred); |
377 | } |
378 | if (rreq->netfs_ops->done) |
379 | rreq->netfs_ops->done(rreq); |
380 | inode_dio_end(inode: rreq->inode); |
381 | } |
382 | |
383 | /* |
384 | * Assess the state of a read request and decide what to do next. |
385 | * |
386 | * Note that we could be in an ordinary kernel thread, on a workqueue or in |
387 | * softirq context at this point. We inherit a ref from the caller. |
388 | */ |
389 | static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async) |
390 | { |
391 | trace_netfs_rreq(rreq, what: netfs_rreq_trace_assess); |
392 | |
393 | again: |
394 | netfs_rreq_is_still_valid(rreq); |
395 | |
396 | if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) && |
397 | test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) { |
398 | if (netfs_rreq_perform_resubmissions(rreq)) |
399 | goto again; |
400 | return; |
401 | } |
402 | |
403 | if (rreq->origin != NETFS_DIO_READ) |
404 | netfs_rreq_unlock_folios(rreq); |
405 | else |
406 | netfs_rreq_assess_dio(rreq); |
407 | |
408 | trace_netfs_rreq(rreq, what: netfs_rreq_trace_wake_ip); |
409 | clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, addr: &rreq->flags); |
410 | wake_up_bit(word: &rreq->flags, NETFS_RREQ_IN_PROGRESS); |
411 | |
412 | if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) |
413 | return netfs_rreq_write_to_cache(rreq); |
414 | |
415 | netfs_rreq_completed(rreq, was_async); |
416 | } |
417 | |
418 | static void netfs_rreq_work(struct work_struct *work) |
419 | { |
420 | struct netfs_io_request *rreq = |
421 | container_of(work, struct netfs_io_request, work); |
422 | netfs_rreq_assess(rreq, was_async: false); |
423 | } |
424 | |
425 | /* |
426 | * Handle the completion of all outstanding I/O operations on a read request. |
427 | * We inherit a ref from the caller. |
428 | */ |
429 | static void netfs_rreq_terminated(struct netfs_io_request *rreq, |
430 | bool was_async) |
431 | { |
432 | if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) && |
433 | was_async) { |
434 | if (!queue_work(wq: system_unbound_wq, work: &rreq->work)) |
435 | BUG(); |
436 | } else { |
437 | netfs_rreq_assess(rreq, was_async); |
438 | } |
439 | } |
440 | |
441 | /** |
442 | * netfs_subreq_terminated - Note the termination of an I/O operation. |
443 | * @subreq: The I/O request that has terminated. |
444 | * @transferred_or_error: The amount of data transferred or an error code. |
445 | * @was_async: The termination was asynchronous |
446 | * |
447 | * This tells the read helper that a contributory I/O operation has terminated, |
448 | * one way or another, and that it should integrate the results. |
449 | * |
450 | * The caller indicates in @transferred_or_error the outcome of the operation, |
451 | * supplying a positive value to indicate the number of bytes transferred, 0 to |
452 | * indicate a failure to transfer anything that should be retried or a negative |
453 | * error code. The helper will look after reissuing I/O operations as |
454 | * appropriate and writing downloaded data to the cache. |
455 | * |
456 | * If @was_async is true, the caller might be running in softirq or interrupt |
457 | * context and we can't sleep. |
458 | */ |
459 | void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, |
460 | ssize_t transferred_or_error, |
461 | bool was_async) |
462 | { |
463 | struct netfs_io_request *rreq = subreq->rreq; |
464 | int u; |
465 | |
466 | _enter("R=%x[%x]{%llx,%lx},%zd" , |
467 | rreq->debug_id, subreq->debug_index, |
468 | subreq->start, subreq->flags, transferred_or_error); |
469 | |
470 | switch (subreq->source) { |
471 | case NETFS_READ_FROM_CACHE: |
472 | netfs_stat(stat: &netfs_n_rh_read_done); |
473 | break; |
474 | case NETFS_DOWNLOAD_FROM_SERVER: |
475 | netfs_stat(stat: &netfs_n_rh_download_done); |
476 | break; |
477 | default: |
478 | break; |
479 | } |
480 | |
481 | if (IS_ERR_VALUE(transferred_or_error)) { |
482 | subreq->error = transferred_or_error; |
483 | trace_netfs_failure(rreq, sreq: subreq, error: transferred_or_error, |
484 | what: netfs_fail_read); |
485 | goto failed; |
486 | } |
487 | |
488 | if (WARN(transferred_or_error > subreq->len - subreq->transferred, |
489 | "Subreq overread: R%x[%x] %zd > %zu - %zu" , |
490 | rreq->debug_id, subreq->debug_index, |
491 | transferred_or_error, subreq->len, subreq->transferred)) |
492 | transferred_or_error = subreq->len - subreq->transferred; |
493 | |
494 | subreq->error = 0; |
495 | subreq->transferred += transferred_or_error; |
496 | if (subreq->transferred < subreq->len) |
497 | goto incomplete; |
498 | |
499 | complete: |
500 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); |
501 | if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) |
502 | set_bit(NETFS_RREQ_COPY_TO_CACHE, addr: &rreq->flags); |
503 | |
504 | out: |
505 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_terminated); |
506 | |
507 | /* If we decrement nr_outstanding to 0, the ref belongs to us. */ |
508 | u = atomic_dec_return(v: &rreq->nr_outstanding); |
509 | if (u == 0) |
510 | netfs_rreq_terminated(rreq, was_async); |
511 | else if (u == 1) |
512 | wake_up_var(var: &rreq->nr_outstanding); |
513 | |
514 | netfs_put_subrequest(subreq, was_async, what: netfs_sreq_trace_put_terminated); |
515 | return; |
516 | |
517 | incomplete: |
518 | if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { |
519 | netfs_clear_unread(subreq); |
520 | subreq->transferred = subreq->len; |
521 | goto complete; |
522 | } |
523 | |
524 | if (transferred_or_error == 0) { |
525 | if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { |
526 | subreq->error = -ENODATA; |
527 | goto failed; |
528 | } |
529 | } else { |
530 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); |
531 | } |
532 | |
533 | __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); |
534 | set_bit(NETFS_RREQ_INCOMPLETE_IO, addr: &rreq->flags); |
535 | goto out; |
536 | |
537 | failed: |
538 | if (subreq->source == NETFS_READ_FROM_CACHE) { |
539 | netfs_stat(stat: &netfs_n_rh_read_failed); |
540 | set_bit(NETFS_RREQ_INCOMPLETE_IO, addr: &rreq->flags); |
541 | } else { |
542 | netfs_stat(stat: &netfs_n_rh_download_failed); |
543 | set_bit(NETFS_RREQ_FAILED, addr: &rreq->flags); |
544 | rreq->error = subreq->error; |
545 | } |
546 | goto out; |
547 | } |
548 | EXPORT_SYMBOL(netfs_subreq_terminated); |
549 | |
550 | static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest *subreq, |
551 | loff_t i_size) |
552 | { |
553 | struct netfs_io_request *rreq = subreq->rreq; |
554 | struct netfs_cache_resources *cres = &rreq->cache_resources; |
555 | |
556 | if (cres->ops) |
557 | return cres->ops->prepare_read(subreq, i_size); |
558 | if (subreq->start >= rreq->i_size) |
559 | return NETFS_FILL_WITH_ZEROES; |
560 | return NETFS_DOWNLOAD_FROM_SERVER; |
561 | } |
562 | |
563 | /* |
564 | * Work out what sort of subrequest the next one will be. |
565 | */ |
566 | static enum netfs_io_source |
567 | netfs_rreq_prepare_read(struct netfs_io_request *rreq, |
568 | struct netfs_io_subrequest *subreq, |
569 | struct iov_iter *io_iter) |
570 | { |
571 | enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; |
572 | struct netfs_inode *ictx = netfs_inode(inode: rreq->inode); |
573 | size_t lsize; |
574 | |
575 | _enter("%llx-%llx,%llx" , subreq->start, subreq->start + subreq->len, rreq->i_size); |
576 | |
577 | if (rreq->origin != NETFS_DIO_READ) { |
578 | source = netfs_cache_prepare_read(subreq, i_size: rreq->i_size); |
579 | if (source == NETFS_INVALID_READ) |
580 | goto out; |
581 | } |
582 | |
583 | if (source == NETFS_DOWNLOAD_FROM_SERVER) { |
584 | /* Call out to the netfs to let it shrink the request to fit |
585 | * its own I/O sizes and boundaries. If it shinks it here, it |
586 | * will be called again to make simultaneous calls; if it wants |
587 | * to make serial calls, it can indicate a short read and then |
588 | * we will call it again. |
589 | */ |
590 | if (rreq->origin != NETFS_DIO_READ) { |
591 | if (subreq->start >= ictx->zero_point) { |
592 | source = NETFS_FILL_WITH_ZEROES; |
593 | goto set; |
594 | } |
595 | if (subreq->len > ictx->zero_point - subreq->start) |
596 | subreq->len = ictx->zero_point - subreq->start; |
597 | } |
598 | if (subreq->len > rreq->i_size - subreq->start) |
599 | subreq->len = rreq->i_size - subreq->start; |
600 | if (rreq->rsize && subreq->len > rreq->rsize) |
601 | subreq->len = rreq->rsize; |
602 | |
603 | if (rreq->netfs_ops->clamp_length && |
604 | !rreq->netfs_ops->clamp_length(subreq)) { |
605 | source = NETFS_INVALID_READ; |
606 | goto out; |
607 | } |
608 | |
609 | if (subreq->max_nr_segs) { |
610 | lsize = netfs_limit_iter(iter: io_iter, start_offset: 0, max_size: subreq->len, |
611 | max_segs: subreq->max_nr_segs); |
612 | if (subreq->len > lsize) { |
613 | subreq->len = lsize; |
614 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_limited); |
615 | } |
616 | } |
617 | } |
618 | |
619 | set: |
620 | if (subreq->len > rreq->len) |
621 | pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n" , |
622 | rreq->debug_id, subreq->debug_index, |
623 | subreq->len, rreq->len); |
624 | |
625 | if (WARN_ON(subreq->len == 0)) { |
626 | source = NETFS_INVALID_READ; |
627 | goto out; |
628 | } |
629 | |
630 | subreq->source = source; |
631 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_prepare); |
632 | |
633 | subreq->io_iter = *io_iter; |
634 | iov_iter_truncate(i: &subreq->io_iter, count: subreq->len); |
635 | iov_iter_advance(i: io_iter, bytes: subreq->len); |
636 | out: |
637 | subreq->source = source; |
638 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_prepare); |
639 | return source; |
640 | } |
641 | |
642 | /* |
643 | * Slice off a piece of a read request and submit an I/O request for it. |
644 | */ |
645 | static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, |
646 | struct iov_iter *io_iter, |
647 | unsigned int *_debug_index) |
648 | { |
649 | struct netfs_io_subrequest *subreq; |
650 | enum netfs_io_source source; |
651 | |
652 | subreq = netfs_alloc_subrequest(rreq); |
653 | if (!subreq) |
654 | return false; |
655 | |
656 | subreq->debug_index = (*_debug_index)++; |
657 | subreq->start = rreq->start + rreq->submitted; |
658 | subreq->len = io_iter->count; |
659 | |
660 | _debug("slice %llx,%zx,%zx" , subreq->start, subreq->len, rreq->submitted); |
661 | list_add_tail(new: &subreq->rreq_link, head: &rreq->subrequests); |
662 | |
663 | /* Call out to the cache to find out what it can do with the remaining |
664 | * subset. It tells us in subreq->flags what it decided should be done |
665 | * and adjusts subreq->len down if the subset crosses a cache boundary. |
666 | * |
667 | * Then when we hand the subset, it can choose to take a subset of that |
668 | * (the starts must coincide), in which case, we go around the loop |
669 | * again and ask it to download the next piece. |
670 | */ |
671 | source = netfs_rreq_prepare_read(rreq, subreq, io_iter); |
672 | if (source == NETFS_INVALID_READ) |
673 | goto subreq_failed; |
674 | |
675 | atomic_inc(v: &rreq->nr_outstanding); |
676 | |
677 | rreq->submitted += subreq->len; |
678 | |
679 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_submit); |
680 | switch (source) { |
681 | case NETFS_FILL_WITH_ZEROES: |
682 | netfs_fill_with_zeroes(rreq, subreq); |
683 | break; |
684 | case NETFS_DOWNLOAD_FROM_SERVER: |
685 | netfs_read_from_server(rreq, subreq); |
686 | break; |
687 | case NETFS_READ_FROM_CACHE: |
688 | netfs_read_from_cache(rreq, subreq, read_hole: NETFS_READ_HOLE_IGNORE); |
689 | break; |
690 | default: |
691 | BUG(); |
692 | } |
693 | |
694 | return true; |
695 | |
696 | subreq_failed: |
697 | rreq->error = subreq->error; |
698 | netfs_put_subrequest(subreq, was_async: false, what: netfs_sreq_trace_put_failed); |
699 | return false; |
700 | } |
701 | |
702 | /* |
703 | * Begin the process of reading in a chunk of data, where that data may be |
704 | * stitched together from multiple sources, including multiple servers and the |
705 | * local cache. |
706 | */ |
707 | int netfs_begin_read(struct netfs_io_request *rreq, bool sync) |
708 | { |
709 | struct iov_iter io_iter; |
710 | unsigned int debug_index = 0; |
711 | int ret; |
712 | |
713 | _enter("R=%x %llx-%llx" , |
714 | rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); |
715 | |
716 | if (rreq->len == 0) { |
717 | pr_err("Zero-sized read [R=%x]\n" , rreq->debug_id); |
718 | return -EIO; |
719 | } |
720 | |
721 | if (rreq->origin == NETFS_DIO_READ) |
722 | inode_dio_begin(inode: rreq->inode); |
723 | |
724 | // TODO: Use bounce buffer if requested |
725 | rreq->io_iter = rreq->iter; |
726 | |
727 | INIT_WORK(&rreq->work, netfs_rreq_work); |
728 | |
729 | /* Chop the read into slices according to what the cache and the netfs |
730 | * want and submit each one. |
731 | */ |
732 | netfs_get_request(rreq, what: netfs_rreq_trace_get_for_outstanding); |
733 | atomic_set(v: &rreq->nr_outstanding, i: 1); |
734 | io_iter = rreq->io_iter; |
735 | do { |
736 | _debug("submit %llx + %zx >= %llx" , |
737 | rreq->start, rreq->submitted, rreq->i_size); |
738 | if (rreq->origin == NETFS_DIO_READ && |
739 | rreq->start + rreq->submitted >= rreq->i_size) |
740 | break; |
741 | if (!netfs_rreq_submit_slice(rreq, io_iter: &io_iter, debug_index: &debug_index)) |
742 | break; |
743 | if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && |
744 | test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) |
745 | break; |
746 | |
747 | } while (rreq->submitted < rreq->len); |
748 | |
749 | if (!rreq->submitted) { |
750 | netfs_put_request(rreq, was_async: false, what: netfs_rreq_trace_put_no_submit); |
751 | if (rreq->origin == NETFS_DIO_READ) |
752 | inode_dio_end(inode: rreq->inode); |
753 | ret = 0; |
754 | goto out; |
755 | } |
756 | |
757 | if (sync) { |
758 | /* Keep nr_outstanding incremented so that the ref always |
759 | * belongs to us, and the service code isn't punted off to a |
760 | * random thread pool to process. Note that this might start |
761 | * further work, such as writing to the cache. |
762 | */ |
763 | wait_var_event(&rreq->nr_outstanding, |
764 | atomic_read(&rreq->nr_outstanding) == 1); |
765 | if (atomic_dec_and_test(v: &rreq->nr_outstanding)) |
766 | netfs_rreq_assess(rreq, was_async: false); |
767 | |
768 | trace_netfs_rreq(rreq, what: netfs_rreq_trace_wait_ip); |
769 | wait_on_bit(word: &rreq->flags, NETFS_RREQ_IN_PROGRESS, |
770 | TASK_UNINTERRUPTIBLE); |
771 | |
772 | ret = rreq->error; |
773 | if (ret == 0 && rreq->submitted < rreq->len && |
774 | rreq->origin != NETFS_DIO_READ) { |
775 | trace_netfs_failure(rreq, NULL, error: ret, what: netfs_fail_short_read); |
776 | ret = -EIO; |
777 | } |
778 | } else { |
779 | /* If we decrement nr_outstanding to 0, the ref belongs to us. */ |
780 | if (atomic_dec_and_test(v: &rreq->nr_outstanding)) |
781 | netfs_rreq_assess(rreq, was_async: false); |
782 | ret = -EIOCBQUEUED; |
783 | } |
784 | |
785 | out: |
786 | return ret; |
787 | } |
788 | |