blk-merge.c source code [linux/block/blk-merge.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Functions related to segment and merge handling
4	*/
5	#include <linux/kernel.h>
6	#include <linux/module.h>
7	#include <linux/bio.h>
8	#include <linux/blkdev.h>
9	#include <linux/blk-integrity.h>
10	#include <linux/scatterlist.h>
11	#include <linux/part_stat.h>
12	#include <linux/blk-cgroup.h>
13
14	#include <trace/events/block.h>
15
16	#include "blk.h"
17	#include "blk-mq-sched.h"
18	#include "blk-rq-qos.h"
19	#include "blk-throttle.h"
20
21	static inline void bio_get_first_bvec(struct bio bio, struct* bio_vec *bv)
22	{
23	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
24	}
25
26	static inline void bio_get_last_bvec(struct bio bio, struct* bio_vec *bv)
27	{
28	struct bvec_iter iter = bio->bi_iter;
29	int idx;
30
31	bio_get_first_bvec(bio, bv);
32	if (bv->bv_len == bio->bi_iter.bi_size)
33	return; / this bio only has a single bvec /
34
35	bio_advance_iter(bio, iter: &iter, bytes: iter.bi_size);
36
37	if (!iter.bi_bvec_done)
38	idx = iter.bi_idx - `1`;
39	else / in the middle of bvec /
40	idx = iter.bi_idx;
41
42	*bv = bio->bi_io_vec[idx];
43
44	/*
45	* iter.bi_bvec_done records actual length of the last bvec
46	* if this bio ends in the middle of one io vector
47	*/
48	if (iter.bi_bvec_done)
49	bv->bv_len = iter.bi_bvec_done;
50	}
51
52	static inline bool bio_will_gap(struct request_queue *q,
53	struct request prev_rq, struct* bio prev, struct* bio *next)
54	{
55	struct bio_vec pb, nb;
56
57	if (!bio_has_data(bio: prev) \|\| !queue_virt_boundary(q))
58	return false;
59
60	/*
61	* Don't merge if the 1st bio starts with non-zero offset, otherwise it
62	* is quite difficult to respect the sg gap limit. We work hard to
63	* merge a huge number of small single bios in case of mkfs.
64	*/
65	if (prev_rq)
66	bio_get_first_bvec(bio: prev_rq->bio, bv: &pb);
67	else
68	bio_get_first_bvec(bio: prev, bv: &pb);
69	if (pb.bv_offset & queue_virt_boundary(q))
70	return true;
71
72	/*
73	* We don't need to worry about the situation that the merged segment
74	* ends in unaligned virt boundary:
75	*
76	* - if 'pb' ends aligned, the merged segment ends aligned
77	* - if 'pb' ends unaligned, the next bio must include
78	* one single bvec of 'nb', otherwise the 'nb' can't
79	* merge with 'pb'
80	*/
81	bio_get_last_bvec(bio: prev, bv: &pb);
82	bio_get_first_bvec(bio: next, bv: &nb);
83	if (biovec_phys_mergeable(q, vec1: &pb, vec2: &nb))
84	return false;
85	return __bvec_gap_to_prev(lim: &q->limits, bprv: &pb, offset: nb.bv_offset);
86	}
87
88	static inline bool req_gap_back_merge(struct request req, struct* bio *bio)
89	{
90	return bio_will_gap(q: req->q, prev_rq: req, prev: req->biotail, next: bio);
91	}
92
93	static inline bool req_gap_front_merge(struct request req, struct* bio *bio)
94	{
95	return bio_will_gap(q: req->q, NULL, prev: bio, next: req->bio);
96	}
97
98	/*
99	* The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
100	* is defined as 'unsigned int', meantime it has to be aligned to with the
101	* logical block size, which is the minimum accepted unit by hardware.
102	*/
103	static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
104	{
105	return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
106	}
107
108	static struct bio bio_split_discard(struct* bio *bio,
109	const struct queue_limits *lim,
110	unsigned nsegs, struct* bio_set *bs)
111	{
112	unsigned int max_discard_sectors, granularity;
113	sector_t tmp;
114	unsigned split_sectors;
115
116	*nsegs = `1`;
117
118	/ Zero-sector (unknown) and one-sector granularities are the same. /
119	granularity = max(lim->discard_granularity >> `9`, `1U`);
120
121	max_discard_sectors =
122	min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
123	max_discard_sectors -= max_discard_sectors % granularity;
124
125	if (unlikely(!max_discard_sectors)) {
126	/ XXX: warn /
127	return NULL;
128	}
129
130	if (bio_sectors(bio) <= max_discard_sectors)
131	return NULL;
132
133	split_sectors = max_discard_sectors;
134
135	/*
136	* If the next starting sector would be misaligned, stop the discard at
137	* the previous aligned sector.
138	*/
139	tmp = bio->bi_iter.bi_sector + split_sectors -
140	((lim->discard_alignment >> `9`) % granularity);
141	tmp = sector_div(tmp, granularity);
142
143	if (split_sectors > tmp)
144	split_sectors -= tmp;
145
146	return bio_split(bio, sectors: split_sectors, GFP_NOIO, bs);
147	}
148
149	static struct bio bio_split_write_zeroes(struct* bio *bio,
150	const struct queue_limits *lim,
151	unsigned nsegs, struct* bio_set *bs)
152	{
153	*nsegs = `0`;
154	if (!lim->max_write_zeroes_sectors)
155	return NULL;
156	if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
157	return NULL;
158	return bio_split(bio, sectors: lim->max_write_zeroes_sectors, GFP_NOIO, bs);
159	}
160
161	/*
162	* Return the maximum number of sectors from the start of a bio that may be
163	* submitted as a single request to a block device. If enough sectors remain,
164	* align the end to the physical block size. Otherwise align the end to the
165	* logical block size. This approach minimizes the number of non-aligned
166	* requests that are submitted to a block device if the start of a bio is not
167	* aligned to a physical block boundary.
168	*/
169	static inline unsigned get_max_io_size(struct bio *bio,
170	const struct queue_limits *lim)
171	{
172	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
173	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
174	unsigned max_sectors = lim->max_sectors, start, end;
175
176	if (lim->chunk_sectors) {
177	max_sectors = min(max_sectors,
178	blk_chunk_sectors_left(bio->bi_iter.bi_sector,
179	lim->chunk_sectors));
180	}
181
182	start = bio->bi_iter.bi_sector & (pbs - `1`);
183	end = (start + max_sectors) & ~(pbs - `1`);
184	if (end > start)
185	return end - start;
186	return max_sectors & ~(lbs - `1`);
187	}
188
189	/**
190	* get_max_segment_size() - maximum number of bytes to add as a single segment
191	* @lim: Request queue limits.
192	* @start_page: See below.
193	* @offset: Offset from @start_page where to add a segment.
194	*
195	* Returns the maximum number of bytes that can be added as a single segment.
196	*/
197	static inline unsigned get_max_segment_size(const struct queue_limits *lim,
198	struct page start_page, unsigned* long offset)
199	{
200	unsigned long mask = lim->seg_boundary_mask;
201
202	offset = mask & (page_to_phys(start_page) + offset);
203
204	/*
205	* Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
206	* after having calculated the minimum.
207	*/
208	return min(mask - offset, (unsigned long)lim->max_segment_size - `1`) + `1`;
209	}
210
211	/**
212	* bvec_split_segs - verify whether or not a bvec should be split in the middle
213	* @lim: [in] queue limits to split based on
214	* @bv: [in] bvec to examine
215	* @nsegs: [in,out] Number of segments in the bio being built. Incremented
216	* by the number of segments from @bv that may be appended to that
217	* bio without exceeding @max_segs
218	* @bytes: [in,out] Number of bytes in the bio being built. Incremented
219	* by the number of bytes from @bv that may be appended to that
220	* bio without exceeding @max_bytes
221	* @max_segs: [in] upper bound for *@nsegs
222	* @max_bytes: [in] upper bound for *@bytes
223	*
224	* When splitting a bio, it can happen that a bvec is encountered that is too
225	* big to fit in a single segment and hence that it has to be split in the
226	* middle. This function verifies whether or not that should happen. The value
227	* %true is returned if and only if appending the entire @bv to a bio with
228	* @nsegs segments and @sectors sectors would make that bio unacceptable for
229	* the block driver.
230	*/
231	static bool bvec_split_segs(const struct queue_limits *lim,
232	const struct bio_vec bv, unsigned* nsegs, unsigned* *bytes,
233	unsigned max_segs, unsigned max_bytes)
234	{
235	unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
236	unsigned len = min(bv->bv_len, max_len);
237	unsigned total_len = `0`;
238	unsigned seg_size = `0`;
239
240	while (len && *nsegs < max_segs) {
241	seg_size = get_max_segment_size(lim, start_page: bv->bv_page,
242	offset: bv->bv_offset + total_len);
243	seg_size = min(seg_size, len);
244
245	(*nsegs)++;
246	total_len += seg_size;
247	len -= seg_size;
248
249	if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
250	break;
251	}
252
253	*bytes += total_len;
254
255	/ tell the caller to split the bvec if it is too big to fit /
256	return len > `0` \|\| bv->bv_len > max_len;
257	}
258
259	/**
260	* bio_split_rw - split a bio in two bios
261	* @bio: [in] bio to be split
262	* @lim: [in] queue limits to split based on
263	* @segs: [out] number of segments in the bio with the first half of the sectors
264	* @bs: [in] bio set to allocate the clone from
265	* @max_bytes: [in] maximum number of bytes per bio
266	*
267	* Clone @bio, update the bi_iter of the clone to represent the first sectors
268	* of @bio and update @bio->bi_iter to represent the remaining sectors. The
269	* following is guaranteed for the cloned bio:
270	* - That it has at most @max_bytes worth of data
271	* - That it has at most queue_max_segments(@q) segments.
272	*
273	* Except for discard requests the cloned bio will point at the bi_io_vec of
274	* the original bio. It is the responsibility of the caller to ensure that the
275	* original bio is not freed before the cloned bio. The caller is also
276	* responsible for ensuring that @bs is only destroyed after processing of the
277	* split bio has finished.
278	*/
279	struct bio bio_split_rw(struct* bio bio, const* struct queue_limits *lim,
280	unsigned segs, struct* bio_set bs, unsigned* max_bytes)
281	{
282	struct bio_vec bv, bvprv, *bvprvp = NULL;
283	struct bvec_iter iter;
284	unsigned nsegs = `0`, bytes = `0`;
285
286	bio_for_each_bvec(bv, bio, iter) {
287	/*
288	* If the queue doesn't support SG gaps and adding this
289	* offset would create a gap, disallow it.
290	*/
291	if (bvprvp && bvec_gap_to_prev(lim, bprv: bvprvp, offset: bv.bv_offset))
292	goto split;
293
294	if (nsegs < lim->max_segments &&
295	bytes + bv.bv_len <= max_bytes &&
296	bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
297	nsegs++;
298	bytes += bv.bv_len;
299	} else {
300	if (bvec_split_segs(lim, bv: &bv, nsegs: &nsegs, bytes: &bytes,
301	max_segs: lim->max_segments, max_bytes))
302	goto split;
303	}
304
305	bvprv = bv;
306	bvprvp = &bvprv;
307	}
308
309	*segs = nsegs;
310	return NULL;
311	split:
312	/*
313	* We can't sanely support splitting for a REQ_NOWAIT bio. End it
314	* with EAGAIN if splitting is required and return an error pointer.
315	*/
316	if (bio->bi_opf & REQ_NOWAIT) {
317	bio->bi_status = BLK_STS_AGAIN;
318	bio_endio(bio);
319	return ERR_PTR(error: -EAGAIN);
320	}
321
322	*segs = nsegs;
323
324	/*
325	* Individual bvecs might not be logical block aligned. Round down the
326	* split size so that each bio is properly block size aligned, even if
327	* we do not use the full hardware limits.
328	*/
329	bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
330
331	/*
332	* Bio splitting may cause subtle trouble such as hang when doing sync
333	* iopoll in direct IO routine. Given performance gain of iopoll for
334	* big IO can be trival, disable iopoll when split needed.
335	*/
336	bio_clear_polled(bio);
337	return bio_split(bio, sectors: bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
338	}
339	EXPORT_SYMBOL_GPL(bio_split_rw);
340
341	/**
342	* __bio_split_to_limits - split a bio to fit the queue limits
343	* @bio: bio to be split
344	* @lim: queue limits to split based on
345	* @nr_segs: returns the number of segments in the returned bio
346	*
347	* Check if @bio needs splitting based on the queue limits, and if so split off
348	* a bio fitting the limits from the beginning of @bio and return it. @bio is
349	* shortened to the remainder and re-submitted.
350	*
351	* The split bio is allocated from @q->bio_split, which is provided by the
352	* block layer.
353	*/
354	struct bio __bio_split_to_limits(struct* bio *bio,
355	const struct queue_limits *lim,
356	unsigned int *nr_segs)
357	{
358	struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
359	struct bio *split;
360
361	switch (bio_op(bio)) {
362	case REQ_OP_DISCARD:
363	case REQ_OP_SECURE_ERASE:
364	split = bio_split_discard(bio, lim, nsegs: nr_segs, bs);
365	break;
366	case REQ_OP_WRITE_ZEROES:
367	split = bio_split_write_zeroes(bio, lim, nsegs: nr_segs, bs);
368	break;
369	default:
370	split = bio_split_rw(bio, lim, nr_segs, bs,
371	get_max_io_size(bio, lim) << SECTOR_SHIFT);
372	if (IS_ERR(ptr: split))
373	return NULL;
374	break;
375	}
376
377	if (split) {
378	/ there isn't chance to merge the split bio /
379	split->bi_opf \|= REQ_NOMERGE;
380
381	blkcg_bio_issue_init(bio: split);
382	bio_chain(split, bio);
383	trace_block_split(bio: split, new_sector: bio->bi_iter.bi_sector);
384	submit_bio_noacct(bio);
385	return split;
386	}
387	return bio;
388	}
389
390	/**
391	* bio_split_to_limits - split a bio to fit the queue limits
392	* @bio: bio to be split
393	*
394	* Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
395	* if so split off a bio fitting the limits from the beginning of @bio and
396	* return it. @bio is shortened to the remainder and re-submitted.
397	*
398	* The split bio is allocated from @q->bio_split, which is provided by the
399	* block layer.
400	*/
401	struct bio bio_split_to_limits(struct* bio *bio)
402	{
403	const struct queue_limits *lim = &bdev_get_queue(bdev: bio->bi_bdev)->limits;
404	unsigned int nr_segs;
405
406	if (bio_may_exceed_limits(bio, lim))
407	return __bio_split_to_limits(bio, lim, nr_segs: &nr_segs);
408	return bio;
409	}
410	EXPORT_SYMBOL(bio_split_to_limits);
411
412	unsigned int blk_recalc_rq_segments(struct request *rq)
413	{
414	unsigned int nr_phys_segs = `0`;
415	unsigned int bytes = `0`;
416	struct req_iterator iter;
417	struct bio_vec bv;
418
419	if (!rq->bio)
420	return `0`;
421
422	switch (bio_op(bio: rq->bio)) {
423	case REQ_OP_DISCARD:
424	case REQ_OP_SECURE_ERASE:
425	if (queue_max_discard_segments(q: rq->q) > `1`) {
426	struct bio *bio = rq->bio;
427
428	for_each_bio(bio)
429	nr_phys_segs++;
430	return nr_phys_segs;
431	}
432	return `1`;
433	case REQ_OP_WRITE_ZEROES:
434	return `0`;
435	default:
436	break;
437	}
438
439	rq_for_each_bvec(bv, rq, iter)
440	bvec_split_segs(lim: &rq->q->limits, bv: &bv, nsegs: &nr_phys_segs, bytes: &bytes,
441	UINT_MAX, UINT_MAX);
442	return nr_phys_segs;
443	}
444
445	static inline struct scatterlist blk_next_sg(struct* scatterlist **sg,
446	struct scatterlist *sglist)
447	{
448	if (!*sg)
449	return sglist;
450
451	/*
452	* If the driver previously mapped a shorter list, we could see a
453	* termination bit prematurely unless it fully inits the sg table
454	* on each mapping. We KNOW that there must be more entries here
455	* or the driver would be buggy, so force clear the termination bit
456	* to avoid doing a full sg_init_table() in drivers for each command.
457	*/
458	sg_unmark_end(sg: *sg);
459	return sg_next(*sg);
460	}
461
462	static unsigned blk_bvec_map_sg(struct request_queue *q,
463	struct bio_vec bvec, struct* scatterlist *sglist,
464	struct scatterlist **sg)
465	{
466	unsigned nbytes = bvec->bv_len;
467	unsigned nsegs = `0`, total = `0`;
468
469	while (nbytes > `0`) {
470	unsigned offset = bvec->bv_offset + total;
471	unsigned len = min(get_max_segment_size(&q->limits,
472	bvec->bv_page, offset), nbytes);
473	struct page *page = bvec->bv_page;
474
475	/*
476	* Unfortunately a fair number of drivers barf on scatterlists
477	* that have an offset larger than PAGE_SIZE, despite other
478	* subsystems dealing with that invariant just fine. For now
479	* stick to the legacy format where we never present those from
480	* the block layer, but the code below should be removed once
481	* these offenders (mostly MMC/SD drivers) are fixed.
482	*/
483	page += (offset >> PAGE_SHIFT);
484	offset &= ~PAGE_MASK;
485
486	*sg = blk_next_sg(sg, sglist);
487	sg_set_page(sg: *sg, page, len, offset);
488
489	total += len;
490	nbytes -= len;
491	nsegs++;
492	}
493
494	return nsegs;
495	}
496
497	static inline int __blk_bvec_map_sg(struct bio_vec bv,
498	struct scatterlist sglist, struct* scatterlist **sg)
499	{
500	*sg = blk_next_sg(sg, sglist);
501	sg_set_page(sg: *sg, page: bv.bv_page, len: bv.bv_len, offset: bv.bv_offset);
502	return `1`;
503	}
504
505	/ only try to merge bvecs into one sg if they are from two bios /
506	static inline bool
507	__blk_segment_map_sg_merge(struct request_queue q, struct* bio_vec *bvec,
508	struct bio_vec bvprv, struct* scatterlist **sg)
509	{
510
511	int nbytes = bvec->bv_len;
512
513	if (!*sg)
514	return false;
515
516	if ((*sg)->length + nbytes > queue_max_segment_size(q))
517	return false;
518
519	if (!biovec_phys_mergeable(q, vec1: bvprv, vec2: bvec))
520	return false;
521
522	(*sg)->length += nbytes;
523
524	return true;
525	}
526
527	static int __blk_bios_map_sg(struct request_queue q, struct* bio *bio,
528	struct scatterlist *sglist,
529	struct scatterlist **sg)
530	{
531	struct bio_vec bvec, bvprv = { NULL };
532	struct bvec_iter iter;
533	int nsegs = `0`;
534	bool new_bio = false;
535
536	for_each_bio(bio) {
537	bio_for_each_bvec(bvec, bio, iter) {
538	/*
539	* Only try to merge bvecs from two bios given we
540	* have done bio internal merge when adding pages
541	* to bio
542	*/
543	if (new_bio &&
544	__blk_segment_map_sg_merge(q, bvec: &bvec, bvprv: &bvprv, sg))
545	goto next_bvec;
546
547	if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
548	nsegs += __blk_bvec_map_sg(bv: bvec, sglist, sg);
549	else
550	nsegs += blk_bvec_map_sg(q, bvec: &bvec, sglist, sg);
551	next_bvec:
552	new_bio = false;
553	}
554	if (likely(bio->bi_iter.bi_size)) {
555	bvprv = bvec;
556	new_bio = true;
557	}
558	}
559
560	return nsegs;
561	}
562
563	/*
564	* map a request to scatterlist, return number of sg entries setup. Caller
565	* must make sure sg can hold rq->nr_phys_segments entries
566	*/
567	int __blk_rq_map_sg(struct request_queue q, struct* request *rq,
568	struct scatterlist sglist, struct* scatterlist **last_sg)
569	{
570	int nsegs = `0`;
571
572	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
573	nsegs = __blk_bvec_map_sg(bv: rq->special_vec, sglist, sg: last_sg);
574	else if (rq->bio)
575	nsegs = __blk_bios_map_sg(q, bio: rq->bio, sglist, sg: last_sg);
576
577	if (*last_sg)
578	sg_mark_end(sg: *last_sg);
579
580	/*
581	* Something must have been wrong if the figured number of
582	* segment is bigger than number of req's physical segments
583	*/
584	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
585
586	return nsegs;
587	}
588	EXPORT_SYMBOL(__blk_rq_map_sg);
589
590	static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
591	sector_t offset)
592	{
593	struct request_queue *q = rq->q;
594	unsigned int max_sectors;
595
596	if (blk_rq_is_passthrough(rq))
597	return q->limits.max_hw_sectors;
598
599	max_sectors = blk_queue_get_max_sectors(q, op: req_op(req: rq));
600	if (!q->limits.chunk_sectors \|\|
601	req_op(req: rq) == REQ_OP_DISCARD \|\|
602	req_op(req: rq) == REQ_OP_SECURE_ERASE)
603	return max_sectors;
604	return min(max_sectors,
605	blk_chunk_sectors_left(offset, q->limits.chunk_sectors));
606	}
607
608	static inline int ll_new_hw_segment(struct request req, struct* bio *bio,
609	unsigned int nr_phys_segs)
610	{
611	if (!blk_cgroup_mergeable(rq: req, bio))
612	goto no_merge;
613
614	if (blk_integrity_merge_bio(req->q, req, bio) == false)
615	goto no_merge;
616
617	/ discard request merge won't add new segment /
618	if (req_op(req) == REQ_OP_DISCARD)
619	return `1`;
620
621	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(rq: req))
622	goto no_merge;
623
624	/*
625	* This will form the start of a new hw segment. Bump both
626	* counters.
627	*/
628	req->nr_phys_segments += nr_phys_segs;
629	return `1`;
630
631	no_merge:
632	req_set_nomerge(q: req->q, req);
633	return `0`;
634	}
635
636	int ll_back_merge_fn(struct request req, struct* bio bio, unsigned* int nr_segs)
637	{
638	if (req_gap_back_merge(req, bio))
639	return `0`;
640	if (blk_integrity_rq(rq: req) &&
641	integrity_req_gap_back_merge(req, next: bio))
642	return `0`;
643	if (!bio_crypt_ctx_back_mergeable(req, bio))
644	return `0`;
645	if (blk_rq_sectors(rq: req) + bio_sectors(bio) >
646	blk_rq_get_max_sectors(rq: req, offset: blk_rq_pos(rq: req))) {
647	req_set_nomerge(q: req->q, req);
648	return `0`;
649	}
650
651	return ll_new_hw_segment(req, bio, nr_phys_segs: nr_segs);
652	}
653
654	static int ll_front_merge_fn(struct request req, struct* bio *bio,
655	unsigned int nr_segs)
656	{
657	if (req_gap_front_merge(req, bio))
658	return `0`;
659	if (blk_integrity_rq(rq: req) &&
660	integrity_req_gap_front_merge(req, bio))
661	return `0`;
662	if (!bio_crypt_ctx_front_mergeable(req, bio))
663	return `0`;
664	if (blk_rq_sectors(rq: req) + bio_sectors(bio) >
665	blk_rq_get_max_sectors(rq: req, offset: bio->bi_iter.bi_sector)) {
666	req_set_nomerge(q: req->q, req);
667	return `0`;
668	}
669
670	return ll_new_hw_segment(req, bio, nr_phys_segs: nr_segs);
671	}
672
673	static bool req_attempt_discard_merge(struct request_queue q, struct* request *req,
674	struct request *next)
675	{
676	unsigned short segments = blk_rq_nr_discard_segments(rq: req);
677
678	if (segments >= queue_max_discard_segments(q))
679	goto no_merge;
680	if (blk_rq_sectors(rq: req) + bio_sectors(next->bio) >
681	blk_rq_get_max_sectors(rq: req, offset: blk_rq_pos(rq: req)))
682	goto no_merge;
683
684	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(rq: next);
685	return true;
686	no_merge:
687	req_set_nomerge(q, req);
688	return false;
689	}
690
691	static int ll_merge_requests_fn(struct request_queue q, struct* request *req,
692	struct request *next)
693	{
694	int total_phys_segments;
695
696	if (req_gap_back_merge(req, bio: next->bio))
697	return `0`;
698
699	/*
700	* Will it become too large?
701	*/
702	if ((blk_rq_sectors(rq: req) + blk_rq_sectors(rq: next)) >
703	blk_rq_get_max_sectors(rq: req, offset: blk_rq_pos(rq: req)))
704	return `0`;
705
706	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
707	if (total_phys_segments > blk_rq_get_max_segments(rq: req))
708	return `0`;
709
710	if (!blk_cgroup_mergeable(rq: req, bio: next->bio))
711	return `0`;
712
713	if (blk_integrity_merge_rq(q, req, next) == false)
714	return `0`;
715
716	if (!bio_crypt_ctx_merge_rq(req, next))
717	return `0`;
718
719	/ Merge is OK... /
720	req->nr_phys_segments = total_phys_segments;
721	return `1`;
722	}
723
724	/**
725	* blk_rq_set_mixed_merge - mark a request as mixed merge
726	* @rq: request to mark as mixed merge
727	*
728	* Description:
729	* @rq is about to be mixed merged. Make sure the attributes
730	* which can be mixed are set in each bio and mark @rq as mixed
731	* merged.
732	*/
733	void blk_rq_set_mixed_merge(struct request *rq)
734	{
735	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
736	struct bio *bio;
737
738	if (rq->rq_flags & RQF_MIXED_MERGE)
739	return;
740
741	/*
742	* @rq will no longer represent mixable attributes for all the
743	* contained bios. It will just track those of the first one.
744	* Distributes the attributs to each bio.
745	*/
746	for (bio = rq->bio; bio; bio = bio->bi_next) {
747	WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
748	(bio->bi_opf & REQ_FAILFAST_MASK) != ff);
749	bio->bi_opf \|= ff;
750	}
751	rq->rq_flags \|= RQF_MIXED_MERGE;
752	}
753
754	static inline blk_opf_t bio_failfast(const struct bio *bio)
755	{
756	if (bio->bi_opf & REQ_RAHEAD)
757	return REQ_FAILFAST_MASK;
758
759	return bio->bi_opf & REQ_FAILFAST_MASK;
760	}
761
762	/*
763	* After we are marked as MIXED_MERGE, any new RA bio has to be updated
764	* as failfast, and request's failfast has to be updated in case of
765	* front merge.
766	*/
767	static inline void blk_update_mixed_merge(struct request *req,
768	struct bio *bio, bool front_merge)
769	{
770	if (req->rq_flags & RQF_MIXED_MERGE) {
771	if (bio->bi_opf & REQ_RAHEAD)
772	bio->bi_opf \|= REQ_FAILFAST_MASK;
773
774	if (front_merge) {
775	req->cmd_flags &= ~REQ_FAILFAST_MASK;
776	req->cmd_flags \|= bio->bi_opf & REQ_FAILFAST_MASK;
777	}
778	}
779	}
780
781	static void blk_account_io_merge_request(struct request *req)
782	{
783	if (blk_do_io_stat(rq: req)) {
784	part_stat_lock();
785	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
786	part_stat_unlock();
787	}
788	}
789
790	static enum elv_merge blk_try_req_merge(struct request *req,
791	struct request *next)
792	{
793	if (blk_discard_mergable(req))
794	return ELEVATOR_DISCARD_MERGE;
795	else if (blk_rq_pos(rq: req) + blk_rq_sectors(rq: req) == blk_rq_pos(rq: next))
796	return ELEVATOR_BACK_MERGE;
797
798	return ELEVATOR_NO_MERGE;
799	}
800
801	/*
802	* For non-mq, this has to be called with the request spinlock acquired.
803	* For mq with scheduling, the appropriate queue wide lock should be held.
804	*/
805	static struct request attempt_merge(struct* request_queue *q,
806	struct request req, struct* request *next)
807	{
808	if (!rq_mergeable(rq: req) \|\| !rq_mergeable(rq: next))
809	return NULL;
810
811	if (req_op(req) != req_op(req: next))
812	return NULL;
813
814	if (rq_data_dir(req) != rq_data_dir(next))
815	return NULL;
816
817	if (req->ioprio != next->ioprio)
818	return NULL;
819
820	/*
821	* If we are allowed to merge, then append bio list
822	* from next to rq and release next. merge_requests_fn
823	* will have updated segment counts, update sector
824	* counts here. Handle DISCARDs separately, as they
825	* have separate settings.
826	*/
827
828	switch (blk_try_req_merge(req, next)) {
829	case ELEVATOR_DISCARD_MERGE:
830	if (!req_attempt_discard_merge(q, req, next))
831	return NULL;
832	break;
833	case ELEVATOR_BACK_MERGE:
834	if (!ll_merge_requests_fn(q, req, next))
835	return NULL;
836	break;
837	default:
838	return NULL;
839	}
840
841	/*
842	* If failfast settings disagree or any of the two is already
843	* a mixed merge, mark both as mixed before proceeding. This
844	* makes sure that all involved bios have mixable attributes
845	* set properly.
846	*/
847	if (((req->rq_flags \| next->rq_flags) & RQF_MIXED_MERGE) \|\|
848	(req->cmd_flags & REQ_FAILFAST_MASK) !=
849	(next->cmd_flags & REQ_FAILFAST_MASK)) {
850	blk_rq_set_mixed_merge(rq: req);
851	blk_rq_set_mixed_merge(rq: next);
852	}
853
854	/*
855	* At this point we have either done a back merge or front merge. We
856	* need the smaller start_time_ns of the merged requests to be the
857	* current request for accounting purposes.
858	*/
859	if (next->start_time_ns < req->start_time_ns)
860	req->start_time_ns = next->start_time_ns;
861
862	req->biotail->bi_next = next->bio;
863	req->biotail = next->biotail;
864
865	req->__data_len += blk_rq_bytes(rq: next);
866
867	if (!blk_discard_mergable(req))
868	elv_merge_requests(q, req, next);
869
870	blk_crypto_rq_put_keyslot(rq: next);
871
872	/*
873	* 'next' is going away, so update stats accordingly
874	*/
875	blk_account_io_merge_request(req: next);
876
877	trace_block_rq_merge(rq: next);
878
879	/*
880	* ownership of bio passed from next to req, return 'next' for
881	* the caller to free
882	*/
883	next->bio = NULL;
884	return next;
885	}
886
887	static struct request attempt_back_merge(struct* request_queue *q,
888	struct request *rq)
889	{
890	struct request *next = elv_latter_request(q, rq);
891
892	if (next)
893	return attempt_merge(q, req: rq, next);
894
895	return NULL;
896	}
897
898	static struct request attempt_front_merge(struct* request_queue *q,
899	struct request *rq)
900	{
901	struct request *prev = elv_former_request(q, rq);
902
903	if (prev)
904	return attempt_merge(q, req: prev, next: rq);
905
906	return NULL;
907	}
908
909	/*
910	* Try to merge 'next' into 'rq'. Return true if the merge happened, false
911	* otherwise. The caller is responsible for freeing 'next' if the merge
912	* happened.
913	*/
914	bool blk_attempt_req_merge(struct request_queue q, struct* request *rq,
915	struct request *next)
916	{
917	return attempt_merge(q, req: rq, next);
918	}
919
920	bool blk_rq_merge_ok(struct request rq, struct* bio *bio)
921	{
922	if (!rq_mergeable(rq) \|\| !bio_mergeable(bio))
923	return false;
924
925	if (req_op(req: rq) != bio_op(bio))
926	return false;
927
928	/ different data direction or already started, don't merge /
929	if (bio_data_dir(bio) != rq_data_dir(rq))
930	return false;
931
932	/ don't merge across cgroup boundaries /
933	if (!blk_cgroup_mergeable(rq, bio))
934	return false;
935
936	/ only merge integrity protected bio into ditto rq /
937	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
938	return false;
939
940	/ Only merge if the crypt contexts are compatible /
941	if (!bio_crypt_rq_ctx_compatible(rq, bio))
942	return false;
943
944	if (rq->ioprio != bio_prio(bio))
945	return false;
946
947	return true;
948	}
949
950	enum elv_merge blk_try_merge(struct request rq, struct* bio *bio)
951	{
952	if (blk_discard_mergable(req: rq))
953	return ELEVATOR_DISCARD_MERGE;
954	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
955	return ELEVATOR_BACK_MERGE;
956	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
957	return ELEVATOR_FRONT_MERGE;
958	return ELEVATOR_NO_MERGE;
959	}
960
961	static void blk_account_io_merge_bio(struct request *req)
962	{
963	if (!blk_do_io_stat(rq: req))
964	return;
965
966	part_stat_lock();
967	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
968	part_stat_unlock();
969	}
970
971	enum bio_merge_status {
972	BIO_MERGE_OK,
973	BIO_MERGE_NONE,
974	BIO_MERGE_FAILED,
975	};
976
977	static enum bio_merge_status bio_attempt_back_merge(struct request *req,
978	struct bio bio, unsigned* int nr_segs)
979	{
980	const blk_opf_t ff = bio_failfast(bio);
981
982	if (!ll_back_merge_fn(req, bio, nr_segs))
983	return BIO_MERGE_FAILED;
984
985	trace_block_bio_backmerge(bio);
986	rq_qos_merge(q: req->q, rq: req, bio);
987
988	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
989	blk_rq_set_mixed_merge(rq: req);
990
991	blk_update_mixed_merge(req, bio, front_merge: false);
992
993	req->biotail->bi_next = bio;
994	req->biotail = bio;
995	req->__data_len += bio->bi_iter.bi_size;
996
997	bio_crypt_free_ctx(bio);
998
999	blk_account_io_merge_bio(req);
1000	return BIO_MERGE_OK;
1001	}
1002
1003	static enum bio_merge_status bio_attempt_front_merge(struct request *req,
1004	struct bio bio, unsigned* int nr_segs)
1005	{
1006	const blk_opf_t ff = bio_failfast(bio);
1007
1008	if (!ll_front_merge_fn(req, bio, nr_segs))
1009	return BIO_MERGE_FAILED;
1010
1011	trace_block_bio_frontmerge(bio);
1012	rq_qos_merge(q: req->q, rq: req, bio);
1013
1014	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1015	blk_rq_set_mixed_merge(rq: req);
1016
1017	blk_update_mixed_merge(req, bio, front_merge: true);
1018
1019	bio->bi_next = req->bio;
1020	req->bio = bio;
1021
1022	req->__sector = bio->bi_iter.bi_sector;
1023	req->__data_len += bio->bi_iter.bi_size;
1024
1025	bio_crypt_do_front_merge(rq: req, bio);
1026
1027	blk_account_io_merge_bio(req);
1028	return BIO_MERGE_OK;
1029	}
1030
1031	static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
1032	struct request req, struct* bio *bio)
1033	{
1034	unsigned short segments = blk_rq_nr_discard_segments(rq: req);
1035
1036	if (segments >= queue_max_discard_segments(q))
1037	goto no_merge;
1038	if (blk_rq_sectors(rq: req) + bio_sectors(bio) >
1039	blk_rq_get_max_sectors(rq: req, offset: blk_rq_pos(rq: req)))
1040	goto no_merge;
1041
1042	rq_qos_merge(q, rq: req, bio);
1043
1044	req->biotail->bi_next = bio;
1045	req->biotail = bio;
1046	req->__data_len += bio->bi_iter.bi_size;
1047	req->nr_phys_segments = segments + `1`;
1048
1049	blk_account_io_merge_bio(req);
1050	return BIO_MERGE_OK;
1051	no_merge:
1052	req_set_nomerge(q, req);
1053	return BIO_MERGE_FAILED;
1054	}
1055
1056	static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
1057	struct request *rq,
1058	struct bio *bio,
1059	unsigned int nr_segs,
1060	bool sched_allow_merge)
1061	{
1062	if (!blk_rq_merge_ok(rq, bio))
1063	return BIO_MERGE_NONE;
1064
1065	switch (blk_try_merge(rq, bio)) {
1066	case ELEVATOR_BACK_MERGE:
1067	if (!sched_allow_merge \|\| blk_mq_sched_allow_merge(q, rq, bio))
1068	return bio_attempt_back_merge(req: rq, bio, nr_segs);
1069	break;
1070	case ELEVATOR_FRONT_MERGE:
1071	if (!sched_allow_merge \|\| blk_mq_sched_allow_merge(q, rq, bio))
1072	return bio_attempt_front_merge(req: rq, bio, nr_segs);
1073	break;
1074	case ELEVATOR_DISCARD_MERGE:
1075	return bio_attempt_discard_merge(q, req: rq, bio);
1076	default:
1077	return BIO_MERGE_NONE;
1078	}
1079
1080	return BIO_MERGE_FAILED;
1081	}
1082
1083	/**
1084	* blk_attempt_plug_merge - try to merge with %current's plugged list
1085	* @q: request_queue new bio is being queued at
1086	* @bio: new bio being queued
1087	* @nr_segs: number of segments in @bio
1088	* from the passed in @q already in the plug list
1089	*
1090	* Determine whether @bio being queued on @q can be merged with the previous
1091	* request on %current's plugged list. Returns %true if merge was successful,
1092	* otherwise %false.
1093	*
1094	* Plugging coalesces IOs from the same issuer for the same purpose without
1095	* going through @q->queue_lock. As such it's more of an issuing mechanism
1096	* than scheduling, and the request, while may have elvpriv data, is not
1097	* added on the elevator at this point. In addition, we don't have
1098	* reliable access to the elevator outside queue lock. Only check basic
1099	* merging parameters without querying the elevator.
1100	*
1101	* Caller must ensure !blk_queue_nomerges(q) beforehand.
1102	*/
1103	bool blk_attempt_plug_merge(struct request_queue q, struct* bio *bio,
1104	unsigned int nr_segs)
1105	{
1106	struct blk_plug *plug;
1107	struct request *rq;
1108
1109	plug = blk_mq_plug(bio);
1110	if (!plug \|\| rq_list_empty(plug->mq_list))
1111	return false;
1112
1113	rq_list_for_each(&plug->mq_list, rq) {
1114	if (rq->q == q) {
1115	if (blk_attempt_bio_merge(q, rq, bio, nr_segs, sched_allow_merge: false) ==
1116	BIO_MERGE_OK)
1117	return true;
1118	break;
1119	}
1120
1121	/*
1122	* Only keep iterating plug list for merges if we have multiple
1123	* queues
1124	*/
1125	if (!plug->multiple_queues)
1126	break;
1127	}
1128	return false;
1129	}
1130
1131	/*
1132	* Iterate list of requests and see if we can merge this bio with any
1133	* of them.
1134	*/
1135	bool blk_bio_list_merge(struct request_queue q, struct* list_head *list,
1136	struct bio bio, unsigned* int nr_segs)
1137	{
1138	struct request *rq;
1139	int checked = `8`;
1140
1141	list_for_each_entry_reverse(rq, list, queuelist) {
1142	if (!checked--)
1143	break;
1144
1145	switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, sched_allow_merge: true)) {
1146	case BIO_MERGE_NONE:
1147	continue;
1148	case BIO_MERGE_OK:
1149	return true;
1150	case BIO_MERGE_FAILED:
1151	return false;
1152	}
1153
1154	}
1155
1156	return false;
1157	}
1158	EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1159
1160	bool blk_mq_sched_try_merge(struct request_queue q, struct* bio *bio,
1161	unsigned int nr_segs, struct request **merged_request)
1162	{
1163	struct request *rq;
1164
1165	switch (elv_merge(q, &rq, bio)) {
1166	case ELEVATOR_BACK_MERGE:
1167	if (!blk_mq_sched_allow_merge(q, rq, bio))
1168	return false;
1169	if (bio_attempt_back_merge(req: rq, bio, nr_segs) != BIO_MERGE_OK)
1170	return false;
1171	*merged_request = attempt_back_merge(q, rq);
1172	if (!*merged_request)
1173	elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1174	return true;
1175	case ELEVATOR_FRONT_MERGE:
1176	if (!blk_mq_sched_allow_merge(q, rq, bio))
1177	return false;
1178	if (bio_attempt_front_merge(req: rq, bio, nr_segs) != BIO_MERGE_OK)
1179	return false;
1180	*merged_request = attempt_front_merge(q, rq);
1181	if (!*merged_request)
1182	elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1183	return true;
1184	case ELEVATOR_DISCARD_MERGE:
1185	return bio_attempt_discard_merge(q, req: rq, bio) == BIO_MERGE_OK;
1186	default:
1187	return false;
1188	}
1189	}
1190	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1191

source code of linux/block/blk-merge.c