write_collect.c source code [linux/fs/netfs/write_collect.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ Network filesystem write subrequest result collection, assessment*
3	* and retrying.
4	*
5	* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
6	* Written by David Howells (dhowells@redhat.com)
7	*/
8
9	#include <linux/export.h>
10	#include <linux/fs.h>
11	#include <linux/mm.h>
12	#include <linux/pagemap.h>
13	#include <linux/slab.h>
14	#include "internal.h"
15
16	/ Notes made in the collector /
17	#define HIT_PENDING 0x01 /* A front op was still pending */
18	#define NEED_REASSESS 0x02 /* Need to loop round and reassess */
19	#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
20	#define NEED_UNLOCK 0x08 /* The pagecache needs unlocking */
21	#define NEED_RETRY 0x10 /* A front op requests retrying */
22	#define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */
23
24	static void netfs_dump_request(const struct netfs_io_request *rreq)
25	{
26	pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n",
27	rreq->debug_id, refcount_read(&rreq->ref), rreq->flags,
28	rreq->origin, rreq->error);
29	pr_err(" st=%llx tsl=%zx/%llx/%llx\n",
30	rreq->start, rreq->transferred, rreq->submitted, rreq->len);
31	pr_err(" cci=%llx/%llx/%llx\n",
32	rreq->cleaned_to, rreq->collected_to, atomic64_read(&rreq->issued_to));
33	pr_err(" iw=%pSR\n", rreq->netfs_ops->issue_write);
34	for (int i = `0`; i < NR_IO_STREAMS; i++) {
35	const struct netfs_io_subrequest *sreq;
36	const struct netfs_io_stream *s = &rreq->io_streams[i];
37
38	pr_err(" str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n",
39	s->stream_nr, s->source, s->error,
40	s->avail, s->active, s->need_retry, s->failed);
41	pr_err(" str[%x] ct=%llx t=%zx\n",
42	s->stream_nr, s->collected_to, s->transferred);
43	list_for_each_entry(sreq, &s->subrequests, rreq_link) {
44	pr_err(" sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n",
45	sreq->stream_nr, sreq->debug_index, sreq->source,
46	sreq->start, sreq->transferred, sreq->len,
47	refcount_read(&sreq->ref), sreq->flags);
48	}
49	}
50	}
51
52	/*
53	* Successful completion of write of a folio to the server and/or cache. Note
54	* that we are not allowed to lock the folio here on pain of deadlocking with
55	* truncate.
56	*/
57	int netfs_folio_written_back(struct folio *folio)
58	{
59	enum netfs_folio_trace why = netfs_folio_trace_clear;
60	struct netfs_inode *ictx = netfs_inode(inode: folio->mapping->host);
61	struct netfs_folio *finfo;
62	struct netfs_group *group = NULL;
63	int gcount = `0`;
64
65	if ((finfo = netfs_folio_info(folio))) {
66	/ Streaming writes cannot be redirtied whilst under writeback,*
67	* so discard the streaming record.
68	*/
69	unsigned long long fend;
70
71	fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
72	if (fend > ictx->zero_point)
73	ictx->zero_point = fend;
74
75	folio_detach_private(folio);
76	group = finfo->netfs_group;
77	gcount++;
78	kfree(objp: finfo);
79	why = netfs_folio_trace_clear_s;
80	goto end_wb;
81	}
82
83	if ((group = netfs_folio_group(folio))) {
84	if (group == NETFS_FOLIO_COPY_TO_CACHE) {
85	why = netfs_folio_trace_clear_cc;
86	folio_detach_private(folio);
87	goto end_wb;
88	}
89
90	/ Need to detach the group pointer if the page didn't get*
91	* redirtied. If it has been redirtied, then it must be within
92	* the same group.
93	*/
94	why = netfs_folio_trace_redirtied;
95	if (!folio_test_dirty(folio)) {
96	folio_detach_private(folio);
97	gcount++;
98	why = netfs_folio_trace_clear_g;
99	}
100	}
101
102	end_wb:
103	trace_netfs_folio(folio, why);
104	folio_end_writeback(folio);
105	return gcount;
106	}
107
108	/*
109	* Unlock any folios we've finished with.
110	*/
111	static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
112	unsigned int *notes)
113	{
114	struct folio_queue *folioq = wreq->buffer.tail;
115	unsigned long long collected_to = wreq->collected_to;
116	unsigned int slot = wreq->buffer.first_tail_slot;
117
118	if (WARN_ON_ONCE(!folioq)) {
119	pr_err("[!] Writeback unlock found empty rolling buffer!\n");
120	netfs_dump_request(rreq: wreq);
121	return;
122	}
123
124	if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
125	if (netfs_pgpriv2_unlock_copied_folios(wreq))
126	*notes \|= MADE_PROGRESS;
127	return;
128	}
129
130	if (slot >= folioq_nr_slots(folioq)) {
131	folioq = rolling_buffer_delete_spent(roll: &wreq->buffer);
132	if (!folioq)
133	return;
134	slot = `0`;
135	}
136
137	for (;;) {
138	struct folio *folio;
139	struct netfs_folio *finfo;
140	unsigned long long fpos, fend;
141	size_t fsize, flen;
142
143	folio = folioq_folio(folioq, slot);
144	if (WARN_ONCE(!folio_test_writeback(folio),
145	"R=%08x: folio %lx is not under writeback\n",
146	wreq->debug_id, folio->index))
147	trace_netfs_folio(folio, why: netfs_folio_trace_not_under_wback);
148
149	fpos = folio_pos(folio);
150	fsize = folio_size(folio);
151	finfo = netfs_folio_info(folio);
152	flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;
153
154	fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
155
156	trace_netfs_collect_folio(wreq, folio, fend, collected_to);
157
158	/ Unlock any folio we've transferred all of. /
159	if (collected_to < fend)
160	break;
161
162	wreq->nr_group_rel += netfs_folio_written_back(folio);
163	wreq->cleaned_to = fpos + fsize;
164	*notes \|= MADE_PROGRESS;
165
166	/ Clean up the head folioq. If we clear an entire folioq, then*
167	* we can get rid of it provided it's not also the tail folioq
168	* being filled by the issuer.
169	*/
170	folioq_clear(folioq, slot);
171	slot++;
172	if (slot >= folioq_nr_slots(folioq)) {
173	folioq = rolling_buffer_delete_spent(roll: &wreq->buffer);
174	if (!folioq)
175	goto done;
176	slot = `0`;
177	}
178
179	if (fpos + fsize >= collected_to)
180	break;
181	}
182
183	wreq->buffer.tail = folioq;
184	done:
185	wreq->buffer.first_tail_slot = slot;
186	}
187
188	/*
189	* Collect and assess the results of various write subrequests. We may need to
190	* retry some of the results - or even do an RMW cycle for content crypto.
191	*
192	* Note that we have a number of parallel, overlapping lists of subrequests,
193	* one to the server and one to the local cache for example, which may not be
194	* the same size or starting position and may not even correspond in boundary
195	* alignment.
196	*/
197	static void netfs_collect_write_results(struct netfs_io_request *wreq)
198	{
199	struct netfs_io_subrequest front, remove;
200	struct netfs_io_stream *stream;
201	unsigned long long collected_to, issued_to;
202	unsigned int notes;
203	int s;
204
205	_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
206	trace_netfs_collect(wreq);
207	trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_collect);
208
209	reassess_streams:
210	issued_to = atomic64_read(v: &wreq->issued_to);
211	smp_rmb();
212	collected_to = ULLONG_MAX;
213	if (wreq->origin == NETFS_WRITEBACK \|\|
214	wreq->origin == NETFS_WRITETHROUGH \|\|
215	wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
216	notes = NEED_UNLOCK;
217	else
218	notes = `0`;
219
220	/ Remove completed subrequests from the front of the streams and*
221	* advance the completion point on each stream. We stop when we hit
222	* something that's in progress. The issuer thread may be adding stuff
223	* to the tail whilst we're doing this.
224	*/
225	for (s = `0`; s < NR_IO_STREAMS; s++) {
226	stream = &wreq->io_streams[s];
227	/ Read active flag before list pointers /
228	if (!smp_load_acquire(&stream->active))
229	continue;
230
231	front = stream->front;
232	while (front) {
233	trace_netfs_collect_sreq(wreq, subreq: front);
234	//_debug("sreq [%x] %llx %zx/%zx",
235	// front->debug_index, front->start, front->transferred, front->len);
236
237	if (stream->collected_to < front->start) {
238	trace_netfs_collect_gap(wreq, stream, jump_to: issued_to, type: `'F'`);
239	stream->collected_to = front->start;
240	}
241
242	/ Stall if the front is still undergoing I/O. /
243	if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
244	notes \|= HIT_PENDING;
245	break;
246	}
247	smp_rmb(); / Read counters after I-P flag. /
248
249	if (stream->failed) {
250	stream->collected_to = front->start + front->len;
251	notes \|= MADE_PROGRESS \| SAW_FAILURE;
252	goto cancel;
253	}
254	if (front->start + front->transferred > stream->collected_to) {
255	stream->collected_to = front->start + front->transferred;
256	stream->transferred = stream->collected_to - wreq->start;
257	notes \|= MADE_PROGRESS;
258	}
259	if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
260	stream->failed = true;
261	stream->error = front->error;
262	if (stream->source == NETFS_UPLOAD_TO_SERVER)
263	mapping_set_error(mapping: wreq->mapping, error: front->error);
264	notes \|= NEED_REASSESS \| SAW_FAILURE;
265	break;
266	}
267	if (front->transferred < front->len) {
268	stream->need_retry = true;
269	notes \|= NEED_RETRY \| MADE_PROGRESS;
270	break;
271	}
272
273	cancel:
274	/ Remove if completely consumed. /
275	spin_lock(lock: &wreq->lock);
276
277	remove = front;
278	list_del_init(entry: &front->rreq_link);
279	front = list_first_entry_or_null(&stream->subrequests,
280	struct netfs_io_subrequest, rreq_link);
281	stream->front = front;
282	spin_unlock(lock: &wreq->lock);
283	netfs_put_subrequest(subreq: remove,
284	what: notes & SAW_FAILURE ?
285	netfs_sreq_trace_put_cancel :
286	netfs_sreq_trace_put_done);
287	}
288
289	/ If we have an empty stream, we need to jump it forward*
290	* otherwise the collection point will never advance.
291	*/
292	if (!front && issued_to > stream->collected_to) {
293	trace_netfs_collect_gap(wreq, stream, jump_to: issued_to, type: `'E'`);
294	stream->collected_to = issued_to;
295	}
296
297	if (stream->collected_to < collected_to)
298	collected_to = stream->collected_to;
299	}
300
301	if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
302	wreq->collected_to = collected_to;
303
304	for (s = `0`; s < NR_IO_STREAMS; s++) {
305	stream = &wreq->io_streams[s];
306	if (stream->active)
307	trace_netfs_collect_stream(wreq, stream);
308	}
309
310	trace_netfs_collect_state(wreq, collected_to: wreq->collected_to, notes);
311
312	/ Unlock any folios that we have now finished with. /
313	if (notes & NEED_UNLOCK) {
314	if (wreq->cleaned_to < wreq->collected_to)
315	netfs_writeback_unlock_folios(wreq, notes: &notes);
316	} else {
317	wreq->cleaned_to = wreq->collected_to;
318	}
319
320	// TODO: Discard encryption buffers
321
322	if (notes & NEED_RETRY)
323	goto need_retry;
324
325	if (notes & MADE_PROGRESS) {
326	netfs_wake_rreq_flag(rreq: wreq, NETFS_RREQ_PAUSE, trace: netfs_rreq_trace_unpause);
327	//cond_resched();
328	goto reassess_streams;
329	}
330
331	if (notes & NEED_REASSESS) {
332	//cond_resched();
333	goto reassess_streams;
334	}
335
336	out:
337	netfs_put_group_many(netfs_group: wreq->group, nr: wreq->nr_group_rel);
338	wreq->nr_group_rel = `0`;
339	_leave(" = %x", notes);
340	return;
341
342	need_retry:
343	/ Okay... We're going to have to retry one or both streams. Note*
344	* that any partially completed op will have had any wholly transferred
345	* folios removed from it.
346	*/
347	_debug("retry");
348	netfs_retry_writes(wreq);
349	goto out;
350	}
351
352	/*
353	* Perform the collection of subrequests, folios and encryption buffers.
354	*/
355	bool netfs_write_collection(struct netfs_io_request *wreq)
356	{
357	struct netfs_inode *ictx = netfs_inode(inode: wreq->inode);
358	size_t transferred;
359	int s;
360
361	_enter("R=%x", wreq->debug_id);
362
363	netfs_collect_write_results(wreq);
364
365	/ We're done when the app thread has finished posting subreqs and all*
366	* the queues in all the streams are empty.
367	*/
368	if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags))
369	return false;
370	smp_rmb(); / Read ALL_QUEUED before lists. /
371
372	transferred = LONG_MAX;
373	for (s = `0`; s < NR_IO_STREAMS; s++) {
374	struct netfs_io_stream *stream = &wreq->io_streams[s];
375	if (!stream->active)
376	continue;
377	if (!list_empty(head: &stream->subrequests))
378	return false;
379	if (stream->transferred < transferred)
380	transferred = stream->transferred;
381	}
382
383	/ Okay, declare that all I/O is complete. /
384	wreq->transferred = transferred;
385	trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_write_done);
386
387	if (wreq->io_streams[`1`].active &&
388	wreq->io_streams[`1`].failed &&
389	ictx->ops->invalidate_cache) {
390	/ Cache write failure doesn't prevent writeback completion*
391	* unless we're in disconnected mode.
392	*/
393	ictx->ops->invalidate_cache(wreq);
394	}
395
396	if (wreq->cleanup)
397	wreq->cleanup(wreq);
398
399	if (wreq->origin == NETFS_DIO_WRITE &&
400	wreq->mapping->nrpages) {
401	/ mmap may have got underfoot and we may now have folios*
402	* locally covering the region we just wrote. Attempt to
403	* discard the folios, but leave in place any modified locally.
404	* ->write_iter() is prevented from interfering by the DIO
405	* counter.
406	*/
407	pgoff_t first = wreq->start >> PAGE_SHIFT;
408	pgoff_t last = (wreq->start + wreq->transferred - `1`) >> PAGE_SHIFT;
409	invalidate_inode_pages2_range(mapping: wreq->mapping, start: first, end: last);
410	}
411
412	if (wreq->origin == NETFS_DIO_WRITE)
413	inode_dio_end(inode: wreq->inode);
414
415	_debug("finished");
416	netfs_wake_rreq_flag(rreq: wreq, NETFS_RREQ_IN_PROGRESS, trace: netfs_rreq_trace_wake_ip);
417	/ As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. /
418
419	if (wreq->iocb) {
420	size_t written = min(wreq->transferred, wreq->len);
421	wreq->iocb->ki_pos += written;
422	if (wreq->iocb->ki_complete)
423	wreq->iocb->ki_complete(
424	wreq->iocb, wreq->error ? wreq->error : written);
425	wreq->iocb = VFS_PTR_POISON;
426	}
427
428	netfs_clear_subrequests(rreq: wreq);
429	return true;
430	}
431
432	void netfs_write_collection_worker(struct work_struct *work)
433	{
434	struct netfs_io_request rreq = container_of(work, struct* netfs_io_request, work);
435
436	netfs_see_request(rreq, what: netfs_rreq_trace_see_work);
437	if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
438	if (netfs_write_collection(wreq: rreq))
439	/ Drop the ref from the IN_PROGRESS flag. /
440	netfs_put_request(rreq, what: netfs_rreq_trace_put_work_ip);
441	else
442	netfs_see_request(rreq, what: netfs_rreq_trace_see_work_complete);
443	}
444	}
445
446	/**
447	* netfs_write_subrequest_terminated - Note the termination of a write operation.
448	* @_op: The I/O request that has terminated.
449	* @transferred_or_error: The amount of data transferred or an error code.
450	*
451	* This tells the library that a contributory write I/O operation has
452	* terminated, one way or another, and that it should collect the results.
453	*
454	* The caller indicates in @transferred_or_error the outcome of the operation,
455	* supplying a positive value to indicate the number of bytes transferred or a
456	* negative error code. The library will look after reissuing I/O operations
457	* as appropriate and writing downloaded data to the cache.
458	*
459	* When this is called, ownership of the subrequest is transferred back to the
460	* library, along with a ref.
461	*
462	* Note that %_op is a void* so that the function can be passed to
463	* kiocb::term_func without the need for a casting wrapper.
464	*/
465	void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error)
466	{
467	struct netfs_io_subrequest *subreq = _op;
468	struct netfs_io_request *wreq = subreq->rreq;
469
470	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
471
472	switch (subreq->source) {
473	case NETFS_UPLOAD_TO_SERVER:
474	netfs_stat(stat: &netfs_n_wh_upload_done);
475	break;
476	case NETFS_WRITE_TO_CACHE:
477	netfs_stat(stat: &netfs_n_wh_write_done);
478	break;
479	default:
480	BUG();
481	}
482
483	if (IS_ERR_VALUE(transferred_or_error)) {
484	subreq->error = transferred_or_error;
485	if (subreq->error == -EAGAIN)
486	set_bit(NETFS_SREQ_NEED_RETRY, addr: &subreq->flags);
487	else
488	set_bit(NETFS_SREQ_FAILED, addr: &subreq->flags);
489	trace_netfs_failure(rreq: wreq, sreq: subreq, error: transferred_or_error, what: netfs_fail_write);
490
491	switch (subreq->source) {
492	case NETFS_WRITE_TO_CACHE:
493	netfs_stat(stat: &netfs_n_wh_write_failed);
494	break;
495	case NETFS_UPLOAD_TO_SERVER:
496	netfs_stat(stat: &netfs_n_wh_upload_failed);
497	break;
498	default:
499	break;
500	}
501	trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_set_pause);
502	set_bit(NETFS_RREQ_PAUSE, addr: &wreq->flags);
503	} else {
504	if (WARN(transferred_or_error > subreq->len - subreq->transferred,
505	"Subreq excess write: R=%x[%x] %zd > %zu - %zu",
506	wreq->debug_id, subreq->debug_index,
507	transferred_or_error, subreq->len, subreq->transferred))
508	transferred_or_error = subreq->len - subreq->transferred;
509
510	subreq->error = `0`;
511	subreq->transferred += transferred_or_error;
512
513	if (subreq->transferred < subreq->len)
514	set_bit(NETFS_SREQ_NEED_RETRY, addr: &subreq->flags);
515	}
516
517	trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_terminated);
518	netfs_subreq_clear_in_progress(subreq);
519	netfs_put_subrequest(subreq, what: netfs_sreq_trace_put_terminated);
520	}
521	EXPORT_SYMBOL(netfs_write_subrequest_terminated);
522

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/fs/netfs/write_collect.c