blk-map.c source code [linux/block/blk-map.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Functions related to mapping data to requests
4	*/
5	#include <linux/kernel.h>
6	#include <linux/sched/task_stack.h>
7	#include <linux/module.h>
8	#include <linux/bio.h>
9	#include <linux/blkdev.h>
10	#include <linux/uio.h>
11
12	#include "blk.h"
13
14	struct bio_map_data {
15	bool is_our_pages : `1`;
16	bool is_null_mapped : `1`;
17	struct iov_iter iter;
18	struct iovec iov[];
19	};
20
21	static struct bio_map_data bio_alloc_map_data(struct* iov_iter *data,
22	gfp_t gfp_mask)
23	{
24	struct bio_map_data *bmd;
25
26	if (data->nr_segs > UIO_MAXIOV)
27	return NULL;
28
29	bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
30	if (!bmd)
31	return NULL;
32	bmd->iter = *data;
33	if (iter_is_iovec(i: data)) {
34	memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs);
35	bmd->iter.__iov = bmd->iov;
36	}
37	return bmd;
38	}
39
40	/**
41	* bio_copy_from_iter - copy all pages from iov_iter to bio
42	* @bio: The &struct bio which describes the I/O as destination
43	* @iter: iov_iter as source
44	*
45	* Copy all pages from iov_iter to bio.
46	* Returns 0 on success, or error on failure.
47	*/
48	static int bio_copy_from_iter(struct bio bio, struct* iov_iter *iter)
49	{
50	struct bio_vec *bvec;
51	struct bvec_iter_all iter_all;
52
53	bio_for_each_segment_all(bvec, bio, iter_all) {
54	ssize_t ret;
55
56	ret = copy_page_from_iter(page: bvec->bv_page,
57	offset: bvec->bv_offset,
58	bytes: bvec->bv_len,
59	i: iter);
60
61	if (!iov_iter_count(i: iter))
62	break;
63
64	if (ret < bvec->bv_len)
65	return -EFAULT;
66	}
67
68	return `0`;
69	}
70
71	/**
72	* bio_copy_to_iter - copy all pages from bio to iov_iter
73	* @bio: The &struct bio which describes the I/O as source
74	* @iter: iov_iter as destination
75	*
76	* Copy all pages from bio to iov_iter.
77	* Returns 0 on success, or error on failure.
78	*/
79	static int bio_copy_to_iter(struct bio bio, struct* iov_iter iter)
80	{
81	struct bio_vec *bvec;
82	struct bvec_iter_all iter_all;
83
84	bio_for_each_segment_all(bvec, bio, iter_all) {
85	ssize_t ret;
86
87	ret = copy_page_to_iter(page: bvec->bv_page,
88	offset: bvec->bv_offset,
89	bytes: bvec->bv_len,
90	i: &iter);
91
92	if (!iov_iter_count(i: &iter))
93	break;
94
95	if (ret < bvec->bv_len)
96	return -EFAULT;
97	}
98
99	return `0`;
100	}
101
102	/**
103	* bio_uncopy_user - finish previously mapped bio
104	* @bio: bio being terminated
105	*
106	* Free pages allocated from bio_copy_user_iov() and write back data
107	* to user space in case of a read.
108	*/
109	static int bio_uncopy_user(struct bio *bio)
110	{
111	struct bio_map_data *bmd = bio->bi_private;
112	int ret = `0`;
113
114	if (!bmd->is_null_mapped) {
115	/*
116	* if we're in a workqueue, the request is orphaned, so
117	* don't copy into a random user address space, just free
118	* and return -EINTR so user space doesn't expect any data.
119	*/
120	if (!current->mm)
121	ret = -EINTR;
122	else if (bio_data_dir(bio) == READ)
123	ret = bio_copy_to_iter(bio, iter: bmd->iter);
124	if (bmd->is_our_pages)
125	bio_free_pages(bio);
126	}
127	kfree(objp: bmd);
128	return ret;
129	}
130
131	static int bio_copy_user_iov(struct request rq, struct* rq_map_data *map_data,
132	struct iov_iter *iter, gfp_t gfp_mask)
133	{
134	struct bio_map_data *bmd;
135	struct page *page;
136	struct bio *bio;
137	int i = `0`, ret;
138	int nr_pages;
139	unsigned int len = iter->count;
140	unsigned int offset = map_data ? offset_in_page(map_data->offset) : `0`;
141
142	bmd = bio_alloc_map_data(data: iter, gfp_mask);
143	if (!bmd)
144	return -ENOMEM;
145
146	/*
147	* We need to do a deep copy of the iov_iter including the iovecs.
148	* The caller provided iov might point to an on-stack or otherwise
149	* shortlived one.
150	*/
151	bmd->is_our_pages = !map_data;
152	bmd->is_null_mapped = (map_data && map_data->null_mapped);
153
154	nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
155
156	ret = -ENOMEM;
157	bio = bio_kmalloc(nr_vecs: nr_pages, gfp_mask);
158	if (!bio)
159	goto out_bmd;
160	bio_init(bio, NULL, table: bio->bi_inline_vecs, max_vecs: nr_pages, opf: req_op(req: rq));
161
162	if (map_data) {
163	nr_pages = `1U` << map_data->page_order;
164	i = map_data->offset / PAGE_SIZE;
165	}
166	while (len) {
167	unsigned int bytes = PAGE_SIZE;
168
169	bytes -= offset;
170
171	if (bytes > len)
172	bytes = len;
173
174	if (map_data) {
175	if (i == map_data->nr_entries * nr_pages) {
176	ret = -ENOMEM;
177	goto cleanup;
178	}
179
180	page = map_data->pages[i / nr_pages];
181	page += (i % nr_pages);
182
183	i++;
184	} else {
185	page = alloc_page(GFP_NOIO \| gfp_mask);
186	if (!page) {
187	ret = -ENOMEM;
188	goto cleanup;
189	}
190	}
191
192	if (bio_add_page(bio, page, len: bytes, off: offset) < bytes) {
193	if (!map_data)
194	__free_page(page);
195	break;
196	}
197
198	len -= bytes;
199	offset = `0`;
200	}
201
202	if (map_data)
203	map_data->offset += bio->bi_iter.bi_size;
204
205	/*
206	* success
207	*/
208	if (iov_iter_rw(i: iter) == WRITE &&
209	(!map_data \|\| !map_data->null_mapped)) {
210	ret = bio_copy_from_iter(bio, iter);
211	if (ret)
212	goto cleanup;
213	} else if (map_data && map_data->from_user) {
214	struct iov_iter iter2 = *iter;
215
216	/ This is the copy-in part of SG_DXFER_TO_FROM_DEV. /
217	iter2.data_source = ITER_SOURCE;
218	ret = bio_copy_from_iter(bio, iter: &iter2);
219	if (ret)
220	goto cleanup;
221	} else {
222	if (bmd->is_our_pages)
223	zero_fill_bio(bio);
224	iov_iter_advance(i: iter, bytes: bio->bi_iter.bi_size);
225	}
226
227	bio->bi_private = bmd;
228
229	ret = blk_rq_append_bio(rq, bio);
230	if (ret)
231	goto cleanup;
232	return `0`;
233	cleanup:
234	if (!map_data)
235	bio_free_pages(bio);
236	bio_uninit(bio);
237	kfree(objp: bio);
238	out_bmd:
239	kfree(objp: bmd);
240	return ret;
241	}
242
243	static void blk_mq_map_bio_put(struct bio *bio)
244	{
245	if (bio->bi_opf & REQ_ALLOC_CACHE) {
246	bio_put(bio);
247	} else {
248	bio_uninit(bio);
249	kfree(objp: bio);
250	}
251	}
252
253	static struct bio blk_rq_map_bio_alloc(struct* request *rq,
254	unsigned int nr_vecs, gfp_t gfp_mask)
255	{
256	struct bio *bio;
257
258	if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) {
259	bio = bio_alloc_bioset(NULL, nr_vecs, opf: rq->cmd_flags, gfp_mask,
260	bs: &fs_bio_set);
261	if (!bio)
262	return NULL;
263	} else {
264	bio = bio_kmalloc(nr_vecs, gfp_mask);
265	if (!bio)
266	return NULL;
267	bio_init(bio, NULL, table: bio->bi_inline_vecs, max_vecs: nr_vecs, opf: req_op(req: rq));
268	}
269	return bio;
270	}
271
272	static int bio_map_user_iov(struct request rq, struct* iov_iter *iter,
273	gfp_t gfp_mask)
274	{
275	unsigned int nr_vecs = iov_iter_npages(i: iter, BIO_MAX_VECS);
276	struct bio *bio;
277	int ret;
278
279	if (!iov_iter_count(i: iter))
280	return -EINVAL;
281
282	bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
283	if (!bio)
284	return -ENOMEM;
285	ret = bio_iov_iter_get_pages(bio, iter);
286	if (ret)
287	goto out_put;
288	ret = blk_rq_append_bio(rq, bio);
289	if (ret)
290	goto out_release;
291	return `0`;
292
293	out_release:
294	bio_release_pages(bio, mark_dirty: false);
295	out_put:
296	blk_mq_map_bio_put(bio);
297	return ret;
298	}
299
300	static void bio_invalidate_vmalloc_pages(struct bio *bio)
301	{
302	#ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
303	if (bio->bi_private && !op_is_write(bio_op(bio))) {
304	unsigned long i, len = `0`;
305
306	for (i = `0`; i < bio->bi_vcnt; i++)
307	len += bio->bi_io_vec[i].bv_len;
308	invalidate_kernel_vmap_range(bio->bi_private, len);
309	}
310	#endif
311	}
312
313	static void bio_map_kern_endio(struct bio *bio)
314	{
315	bio_invalidate_vmalloc_pages(bio);
316	bio_uninit(bio);
317	kfree(objp: bio);
318	}
319
320	static struct bio bio_map_kern(void* data, unsigned* int len, enum req_op op,
321	gfp_t gfp_mask)
322	{
323	unsigned int nr_vecs = bio_add_max_vecs(kaddr: data, len);
324	struct bio *bio;
325
326	bio = bio_kmalloc(nr_vecs, gfp_mask);
327	if (!bio)
328	return ERR_PTR(error: -ENOMEM);
329	bio_init(bio, NULL, table: bio->bi_inline_vecs, max_vecs: nr_vecs, opf: op);
330	if (is_vmalloc_addr(x: data)) {
331	bio->bi_private = data;
332	if (!bio_add_vmalloc(bio, vaddr: data, len)) {
333	bio_uninit(bio);
334	kfree(objp: bio);
335	return ERR_PTR(error: -EINVAL);
336	}
337	} else {
338	bio_add_virt_nofail(bio, vaddr: data, len);
339	}
340	bio->bi_end_io = bio_map_kern_endio;
341	return bio;
342	}
343
344	static void bio_copy_kern_endio(struct bio *bio)
345	{
346	bio_free_pages(bio);
347	bio_uninit(bio);
348	kfree(objp: bio);
349	}
350
351	static void bio_copy_kern_endio_read(struct bio *bio)
352	{
353	char *p = bio->bi_private;
354	struct bio_vec *bvec;
355	struct bvec_iter_all iter_all;
356
357	bio_for_each_segment_all(bvec, bio, iter_all) {
358	memcpy_from_bvec(to: p, bvec);
359	p += bvec->bv_len;
360	}
361
362	bio_copy_kern_endio(bio);
363	}
364
365	/**
366	* bio_copy_kern - copy kernel address into bio
367	* @data: pointer to buffer to copy
368	* @len: length in bytes
369	* @op: bio/request operation
370	* @gfp_mask: allocation flags for bio and page allocation
371	*
372	* copy the kernel address into a bio suitable for io to a block
373	* device. Returns an error pointer in case of error.
374	*/
375	static struct bio bio_copy_kern(void* data, unsigned* int len, enum req_op op,
376	gfp_t gfp_mask)
377	{
378	unsigned long kaddr = (unsigned long)data;
379	unsigned long end = (kaddr + len + PAGE_SIZE - `1`) >> PAGE_SHIFT;
380	unsigned long start = kaddr >> PAGE_SHIFT;
381	struct bio *bio;
382	void *p = data;
383	int nr_pages = `0`;
384
385	/*
386	* Overflow, abort
387	*/
388	if (end < start)
389	return ERR_PTR(error: -EINVAL);
390
391	nr_pages = end - start;
392	bio = bio_kmalloc(nr_vecs: nr_pages, gfp_mask);
393	if (!bio)
394	return ERR_PTR(error: -ENOMEM);
395	bio_init(bio, NULL, table: bio->bi_inline_vecs, max_vecs: nr_pages, opf: op);
396
397	while (len) {
398	struct page *page;
399	unsigned int bytes = PAGE_SIZE;
400
401	if (bytes > len)
402	bytes = len;
403
404	page = alloc_page(GFP_NOIO \| __GFP_ZERO \| gfp_mask);
405	if (!page)
406	goto cleanup;
407
408	if (op_is_write(op))
409	memcpy(page_address(page), p, bytes);
410
411	if (bio_add_page(bio, page, len: bytes, off: `0`) < bytes)
412	break;
413
414	len -= bytes;
415	p += bytes;
416	}
417
418	if (op_is_write(op)) {
419	bio->bi_end_io = bio_copy_kern_endio;
420	} else {
421	bio->bi_end_io = bio_copy_kern_endio_read;
422	bio->bi_private = data;
423	}
424
425	return bio;
426
427	cleanup:
428	bio_free_pages(bio);
429	bio_uninit(bio);
430	kfree(objp: bio);
431	return ERR_PTR(error: -ENOMEM);
432	}
433
434	/*
435	* Append a bio to a passthrough request. Only works if the bio can be merged
436	* into the request based on the driver constraints.
437	*/
438	int blk_rq_append_bio(struct request rq, struct* bio *bio)
439	{
440	const struct queue_limits *lim = &rq->q->limits;
441	unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
442	unsigned int nr_segs = `0`;
443	int ret;
444
445	/ check that the data layout matches the hardware restrictions /
446	ret = bio_split_rw_at(bio, lim, segs: &nr_segs, max_bytes);
447	if (ret) {
448	/ if we would have to split the bio, copy instead /
449	if (ret > `0`)
450	ret = -EREMOTEIO;
451	return ret;
452	}
453
454	if (rq->bio) {
455	if (!ll_back_merge_fn(req: rq, bio, nr_segs))
456	return -EINVAL;
457	rq->biotail->bi_next = bio;
458	rq->biotail = bio;
459	rq->__data_len += bio->bi_iter.bi_size;
460	bio_crypt_free_ctx(bio);
461	return `0`;
462	}
463
464	rq->nr_phys_segments = nr_segs;
465	rq->bio = rq->biotail = bio;
466	rq->__data_len = bio->bi_iter.bi_size;
467	return `0`;
468	}
469	EXPORT_SYMBOL(blk_rq_append_bio);
470
471	/ Prepare bio for passthrough IO given ITER_BVEC iter /
472	static int blk_rq_map_user_bvec(struct request rq, const* struct iov_iter *iter)
473	{
474	unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
475	struct bio *bio;
476	int ret;
477
478	if (!iov_iter_count(i: iter) \|\| iov_iter_count(i: iter) > max_bytes)
479	return -EINVAL;
480
481	/ reuse the bvecs from the iterator instead of allocating new ones /
482	bio = blk_rq_map_bio_alloc(rq, nr_vecs: `0`, GFP_KERNEL);
483	if (!bio)
484	return -ENOMEM;
485	bio_iov_bvec_set(bio, iter);
486
487	ret = blk_rq_append_bio(rq, bio);
488	if (ret)
489	blk_mq_map_bio_put(bio);
490	return ret;
491	}
492
493	/**
494	* blk_rq_map_user_iov - map user data to a request, for passthrough requests
495	* @q: request queue where request should be inserted
496	* @rq: request to map data to
497	* @map_data: pointer to the rq_map_data holding pages (if necessary)
498	* @iter: iovec iterator
499	* @gfp_mask: memory allocation flags
500	*
501	* Description:
502	* Data will be mapped directly for zero copy I/O, if possible. Otherwise
503	* a kernel bounce buffer is used.
504	*
505	* A matching blk_rq_unmap_user() must be issued at the end of I/O, while
506	* still in process context.
507	*/
508	int blk_rq_map_user_iov(struct request_queue q, struct* request *rq,
509	struct rq_map_data *map_data,
510	const struct iov_iter *iter, gfp_t gfp_mask)
511	{
512	bool copy = false, map_bvec = false;
513	unsigned long align = blk_lim_dma_alignment_and_pad(lim: &q->limits);
514	struct bio *bio = NULL;
515	struct iov_iter i;
516	int ret = -EINVAL;
517
518	if (map_data)
519	copy = true;
520	else if (iov_iter_alignment(i: iter) & align)
521	copy = true;
522	else if (iov_iter_is_bvec(i: iter))
523	map_bvec = true;
524	else if (!user_backed_iter(i: iter))
525	copy = true;
526	else if (queue_virt_boundary(q))
527	copy = queue_virt_boundary(q) & iov_iter_gap_alignment(i: iter);
528
529	if (map_bvec) {
530	ret = blk_rq_map_user_bvec(rq, iter);
531	if (!ret)
532	return `0`;
533	if (ret != -EREMOTEIO)
534	goto fail;
535	/ fall back to copying the data on limits mismatches /
536	copy = true;
537	}
538
539	i = *iter;
540	do {
541	if (copy)
542	ret = bio_copy_user_iov(rq, map_data, iter: &i, gfp_mask);
543	else
544	ret = bio_map_user_iov(rq, iter: &i, gfp_mask);
545	if (ret) {
546	if (ret == -EREMOTEIO)
547	ret = -EINVAL;
548	goto unmap_rq;
549	}
550	if (!bio)
551	bio = rq->bio;
552	} while (iov_iter_count(i: &i));
553
554	return `0`;
555
556	unmap_rq:
557	blk_rq_unmap_user(bio);
558	fail:
559	rq->bio = NULL;
560	return ret;
561	}
562	EXPORT_SYMBOL(blk_rq_map_user_iov);
563
564	int blk_rq_map_user(struct request_queue q, struct* request *rq,
565	struct rq_map_data map_data, void* __user *ubuf,
566	unsigned long len, gfp_t gfp_mask)
567	{
568	struct iov_iter i;
569	int ret = import_ubuf(rq_data_dir(rq), buf: ubuf, len, i: &i);
570
571	if (unlikely(ret < `0`))
572	return ret;
573
574	return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
575	}
576	EXPORT_SYMBOL(blk_rq_map_user);
577
578	int blk_rq_map_user_io(struct request req, struct* rq_map_data *map_data,
579	void __user ubuf, unsigned* long buf_len, gfp_t gfp_mask,
580	bool vec, int iov_count, bool check_iter_count, int rw)
581	{
582	int ret = `0`;
583
584	if (vec) {
585	struct iovec fast_iov[UIO_FASTIOV];
586	struct iovec *iov = fast_iov;
587	struct iov_iter iter;
588
589	ret = import_iovec(type: rw, uvec: ubuf, nr_segs: iov_count ? iov_count : buf_len,
590	UIO_FASTIOV, iovp: &iov, i: &iter);
591	if (ret < `0`)
592	return ret;
593
594	if (iov_count) {
595	/ SG_IO howto says that the shorter of the two wins /
596	iov_iter_truncate(i: &iter, count: buf_len);
597	if (check_iter_count && !iov_iter_count(i: &iter)) {
598	kfree(objp: iov);
599	return -EINVAL;
600	}
601	}
602
603	ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
604	gfp_mask);
605	kfree(objp: iov);
606	} else if (buf_len) {
607	ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
608	gfp_mask);
609	}
610	return ret;
611	}
612	EXPORT_SYMBOL(blk_rq_map_user_io);
613
614	/**
615	* blk_rq_unmap_user - unmap a request with user data
616	* @bio: start of bio list
617	*
618	* Description:
619	* Unmap a rq previously mapped by blk_rq_map_user(). The caller must
620	* supply the original rq->bio from the blk_rq_map_user() return, since
621	* the I/O completion may have changed rq->bio.
622	*/
623	int blk_rq_unmap_user(struct bio *bio)
624	{
625	struct bio *next_bio;
626	int ret = `0`, ret2;
627
628	while (bio) {
629	if (bio->bi_private) {
630	ret2 = bio_uncopy_user(bio);
631	if (ret2 && !ret)
632	ret = ret2;
633	} else {
634	bio_release_pages(bio, bio_data_dir(bio) == READ);
635	}
636
637	if (bio_integrity(bio))
638	bio_integrity_unmap_user(bio);
639
640	next_bio = bio;
641	bio = bio->bi_next;
642	blk_mq_map_bio_put(bio: next_bio);
643	}
644
645	return ret;
646	}
647	EXPORT_SYMBOL(blk_rq_unmap_user);
648
649	/**
650	* blk_rq_map_kern - map kernel data to a request, for passthrough requests
651	* @rq: request to fill
652	* @kbuf: the kernel buffer
653	* @len: length of user data
654	* @gfp_mask: memory allocation flags
655	*
656	* Description:
657	* Data will be mapped directly if possible. Otherwise a bounce
658	* buffer is used. Can be called multiple times to append multiple
659	* buffers.
660	*/
661	int blk_rq_map_kern(struct request rq, void* kbuf, unsigned* int len,
662	gfp_t gfp_mask)
663	{
664	unsigned long addr = (unsigned long) kbuf;
665	struct bio *bio;
666	int ret;
667
668	if (len > (queue_max_hw_sectors(q: rq->q) << SECTOR_SHIFT))
669	return -EINVAL;
670	if (!len \|\| !kbuf)
671	return -EINVAL;
672
673	if (!blk_rq_aligned(q: rq->q, addr, len) \|\| object_is_on_stack(obj: kbuf))
674	bio = bio_copy_kern(data: kbuf, len, op: req_op(req: rq), gfp_mask);
675	else
676	bio = bio_map_kern(data: kbuf, len, op: req_op(req: rq), gfp_mask);
677
678	if (IS_ERR(ptr: bio))
679	return PTR_ERR(ptr: bio);
680
681	ret = blk_rq_append_bio(rq, bio);
682	if (unlikely(ret)) {
683	bio_uninit(bio);
684	kfree(objp: bio);
685	}
686	return ret;
687	}
688	EXPORT_SYMBOL(blk_rq_map_kern);
689

source code of linux/block/blk-map.c