cma.c source code [linux/mm/cma.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Contiguous Memory Allocator
4	*
5	* Copyright (c) 2010-2011 by Samsung Electronics.
6	* Copyright IBM Corporation, 2013
7	* Copyright LG Electronics Inc., 2014
8	* Written by:
9	* Marek Szyprowski <m.szyprowski@samsung.com>
10	* Michal Nazarewicz <mina86@mina86.com>
11	* Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
12	* Joonsoo Kim <iamjoonsoo.kim@lge.com>
13	*/
14
15	#define pr_fmt(fmt) "cma: " fmt
16
17	#define CREATE_TRACE_POINTS
18
19	#include <linux/memblock.h>
20	#include <linux/err.h>
21	#include <linux/list.h>
22	#include <linux/mm.h>
23	#include <linux/sizes.h>
24	#include <linux/slab.h>
25	#include <linux/log2.h>
26	#include <linux/cma.h>
27	#include <linux/highmem.h>
28	#include <linux/io.h>
29	#include <linux/kmemleak.h>
30	#include <trace/events/cma.h>
31
32	#include "internal.h"
33	#include "cma.h"
34
35	struct cma cma_areas[MAX_CMA_AREAS];
36	unsigned int cma_area_count;
37
38	static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
39	phys_addr_t size, phys_addr_t limit,
40	phys_addr_t alignment, unsigned int order_per_bit,
41	bool fixed, const char name, struct* cma **res_cma,
42	int nid);
43
44	phys_addr_t cma_get_base(const struct cma *cma)
45	{
46	WARN_ON_ONCE(cma->nranges != `1`);
47	return PFN_PHYS(cma->ranges[`0`].base_pfn);
48	}
49
50	unsigned long cma_get_size(const struct cma *cma)
51	{
52	return cma->count << PAGE_SHIFT;
53	}
54
55	const char cma_get_name(const* struct cma *cma)
56	{
57	return cma->name;
58	}
59
60	static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
61	unsigned int align_order)
62	{
63	if (align_order <= cma->order_per_bit)
64	return `0`;
65	return (`1UL` << (align_order - cma->order_per_bit)) - `1`;
66	}
67
68	/*
69	* Find the offset of the base PFN from the specified align_order.
70	* The value returned is represented in order_per_bits.
71	*/
72	static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
73	const struct cma_memrange *cmr,
74	unsigned int align_order)
75	{
76	return (cmr->base_pfn & ((`1UL` << align_order) - `1`))
77	>> cma->order_per_bit;
78	}
79
80	static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
81	unsigned long pages)
82	{
83	return ALIGN(pages, `1UL` << cma->order_per_bit) >> cma->order_per_bit;
84	}
85
86	static void cma_clear_bitmap(struct cma cma, const* struct cma_memrange *cmr,
87	unsigned long pfn, unsigned long count)
88	{
89	unsigned long bitmap_no, bitmap_count;
90	unsigned long flags;
91
92	bitmap_no = (pfn - cmr->base_pfn) >> cma->order_per_bit;
93	bitmap_count = cma_bitmap_pages_to_bits(cma, pages: count);
94
95	spin_lock_irqsave(&cma->lock, flags);
96	bitmap_clear(map: cmr->bitmap, start: bitmap_no, nbits: bitmap_count);
97	cma->available_count += count;
98	spin_unlock_irqrestore(lock: &cma->lock, flags);
99	}
100
101	/*
102	* Check if a CMA area contains no ranges that intersect with
103	* multiple zones. Store the result in the flags in case
104	* this gets called more than once.
105	*/
106	bool cma_validate_zones(struct cma *cma)
107	{
108	int r;
109	unsigned long base_pfn;
110	struct cma_memrange *cmr;
111	bool valid_bit_set;
112
113	/*
114	* If already validated, return result of previous check.
115	* Either the valid or invalid bit will be set if this
116	* check has already been done. If neither is set, the
117	* check has not been performed yet.
118	*/
119	valid_bit_set = test_bit(CMA_ZONES_VALID, &cma->flags);
120	if (valid_bit_set \|\| test_bit(CMA_ZONES_INVALID, &cma->flags))
121	return valid_bit_set;
122
123	for (r = `0`; r < cma->nranges; r++) {
124	cmr = &cma->ranges[r];
125	base_pfn = cmr->base_pfn;
126
127	/*
128	* alloc_contig_range() requires the pfn range specified
129	* to be in the same zone. Simplify by forcing the entire
130	* CMA resv range to be in the same zone.
131	*/
132	WARN_ON_ONCE(!pfn_valid(base_pfn));
133	if (pfn_range_intersects_zones(nid: cma->nid, start_pfn: base_pfn, nr_pages: cmr->count)) {
134	set_bit(nr: CMA_ZONES_INVALID, addr: &cma->flags);
135	return false;
136	}
137	}
138
139	set_bit(nr: CMA_ZONES_VALID, addr: &cma->flags);
140
141	return true;
142	}
143
144	static void __init cma_activate_area(struct cma *cma)
145	{
146	unsigned long pfn, end_pfn, early_pfn[CMA_MAX_RANGES];
147	int allocrange, r;
148	struct cma_memrange *cmr;
149	unsigned long bitmap_count, count;
150
151	for (allocrange = `0`; allocrange < cma->nranges; allocrange++) {
152	cmr = &cma->ranges[allocrange];
153	early_pfn[allocrange] = cmr->early_pfn;
154	cmr->bitmap = bitmap_zalloc(nbits: cma_bitmap_maxno(cma, cmr),
155	GFP_KERNEL);
156	if (!cmr->bitmap)
157	goto cleanup;
158	}
159
160	if (!cma_validate_zones(cma))
161	goto cleanup;
162
163	for (r = `0`; r < cma->nranges; r++) {
164	cmr = &cma->ranges[r];
165	if (early_pfn[r] != cmr->base_pfn) {
166	count = early_pfn[r] - cmr->base_pfn;
167	bitmap_count = cma_bitmap_pages_to_bits(cma, pages: count);
168	bitmap_set(map: cmr->bitmap, start: `0`, nbits: bitmap_count);
169	}
170
171	for (pfn = early_pfn[r]; pfn < cmr->base_pfn + cmr->count;
172	pfn += pageblock_nr_pages)
173	init_cma_reserved_pageblock(pfn_to_page(pfn));
174	}
175
176	spin_lock_init(&cma->lock);
177
178	mutex_init(&cma->alloc_mutex);
179
180	#ifdef CONFIG_CMA_DEBUGFS
181	INIT_HLIST_HEAD(&cma->mem_head);
182	spin_lock_init(&cma->mem_head_lock);
183	#endif
184	set_bit(nr: CMA_ACTIVATED, addr: &cma->flags);
185
186	return;
187
188	cleanup:
189	for (r = `0`; r < allocrange; r++)
190	bitmap_free(bitmap: cma->ranges[r].bitmap);
191
192	/ Expose all pages to the buddy, they are useless for CMA. /
193	if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) {
194	for (r = `0`; r < allocrange; r++) {
195	cmr = &cma->ranges[r];
196	end_pfn = cmr->base_pfn + cmr->count;
197	for (pfn = early_pfn[r]; pfn < end_pfn; pfn++)
198	free_reserved_page(pfn_to_page(pfn));
199	}
200	}
201	totalcma_pages -= cma->count;
202	cma->available_count = cma->count = `0`;
203	pr_err("CMA area %s could not be activated\n", cma->name);
204	}
205
206	static int __init cma_init_reserved_areas(void)
207	{
208	int i;
209
210	for (i = `0`; i < cma_area_count; i++)
211	cma_activate_area(cma: &cma_areas[i]);
212
213	return `0`;
214	}
215	core_initcall(cma_init_reserved_areas);
216
217	void __init cma_reserve_pages_on_error(struct cma *cma)
218	{
219	set_bit(nr: CMA_RESERVE_PAGES_ON_ERROR, addr: &cma->flags);
220	}
221
222	static int __init cma_new_area(const char *name, phys_addr_t size,
223	unsigned int order_per_bit,
224	struct cma **res_cma)
225	{
226	struct cma *cma;
227
228	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
229	pr_err("Not enough slots for CMA reserved regions!\n");
230	return -ENOSPC;
231	}
232
233	/*
234	* Each reserved area must be initialised later, when more kernel
235	* subsystems (like slab allocator) are available.
236	*/
237	cma = &cma_areas[cma_area_count];
238	cma_area_count++;
239
240	if (name)
241	snprintf(buf: cma->name, CMA_MAX_NAME, fmt: "%s", name);
242	else
243	snprintf(buf: cma->name, CMA_MAX_NAME, fmt: "cma%d\n", cma_area_count);
244
245	cma->available_count = cma->count = size >> PAGE_SHIFT;
246	cma->order_per_bit = order_per_bit;
247	*res_cma = cma;
248	totalcma_pages += cma->count;
249
250	return `0`;
251	}
252
253	static void __init cma_drop_area(struct cma *cma)
254	{
255	totalcma_pages -= cma->count;
256	cma_area_count--;
257	}
258
259	/**
260	* cma_init_reserved_mem() - create custom contiguous area from reserved memory
261	* @base: Base address of the reserved area
262	* @size: Size of the reserved area (in bytes),
263	* @order_per_bit: Order of pages represented by one bit on bitmap.
264	* @name: The name of the area. If this parameter is NULL, the name of
265	* the area will be set to "cmaN", where N is a running counter of
266	* used areas.
267	* @res_cma: Pointer to store the created cma region.
268	*
269	* This function creates custom contiguous area from already reserved memory.
270	*/
271	int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
272	unsigned int order_per_bit,
273	const char *name,
274	struct cma **res_cma)
275	{
276	struct cma *cma;
277	int ret;
278
279	/ Sanity checks /
280	if (!size \|\| !memblock_is_region_reserved(base, size))
281	return -EINVAL;
282
283	/*
284	* CMA uses CMA_MIN_ALIGNMENT_BYTES as alignment requirement which
285	* needs pageblock_order to be initialized. Let's enforce it.
286	*/
287	if (!pageblock_order) {
288	pr_err("pageblock_order not yet initialized. Called during early boot?\n");
289	return -EINVAL;
290	}
291
292	/ ensure minimal alignment required by mm core /
293	if (!IS_ALIGNED(base \| size, CMA_MIN_ALIGNMENT_BYTES))
294	return -EINVAL;
295
296	ret = cma_new_area(name, size, order_per_bit, res_cma: &cma);
297	if (ret != `0`)
298	return ret;
299
300	cma->ranges[`0`].base_pfn = PFN_DOWN(base);
301	cma->ranges[`0`].early_pfn = PFN_DOWN(base);
302	cma->ranges[`0`].count = cma->count;
303	cma->nranges = `1`;
304	cma->nid = NUMA_NO_NODE;
305
306	*res_cma = cma;
307
308	return `0`;
309	}
310
311	/*
312	* Structure used while walking physical memory ranges and finding out
313	* which one(s) to use for a CMA area.
314	*/
315	struct cma_init_memrange {
316	phys_addr_t base;
317	phys_addr_t size;
318	struct list_head list;
319	};
320
321	/*
322	* Work array used during CMA initialization.
323	*/
324	static struct cma_init_memrange memranges[CMA_MAX_RANGES] __initdata;
325
326	static bool __init revsizecmp(struct cma_init_memrange *mlp,
327	struct cma_init_memrange *mrp)
328	{
329	return mlp->size > mrp->size;
330	}
331
332	static bool __init basecmp(struct cma_init_memrange *mlp,
333	struct cma_init_memrange *mrp)
334	{
335	return mlp->base < mrp->base;
336	}
337
338	/*
339	* Helper function to create sorted lists.
340	*/
341	static void __init list_insert_sorted(
342	struct list_head *ranges,
343	struct cma_init_memrange *mrp,
344	bool (cmp)(struct* cma_init_memrange lh, struct* cma_init_memrange *rh))
345	{
346	struct list_head *mp;
347	struct cma_init_memrange *mlp;
348
349	if (list_empty(head: ranges))
350	list_add(new: &mrp->list, head: ranges);
351	else {
352	list_for_each(mp, ranges) {
353	mlp = list_entry(mp, struct cma_init_memrange, list);
354	if (cmp(mlp, mrp))
355	break;
356	}
357	__list_add(new: &mrp->list, prev: mlp->list.prev, next: &mlp->list);
358	}
359	}
360
361	/*
362	* Create CMA areas with a total size of @total_size. A normal allocation
363	* for one area is tried first. If that fails, the biggest memblock
364	* ranges above 4G are selected, and allocated bottom up.
365	*
366	* The complexity here is not great, but this function will only be
367	* called during boot, and the lists operated on have fewer than
368	* CMA_MAX_RANGES elements (default value: 8).
369	*/
370	int __init cma_declare_contiguous_multi(phys_addr_t total_size,
371	phys_addr_t align, unsigned int order_per_bit,
372	const char name, struct* cma *res_cma, int* nid)
373	{
374	phys_addr_t start = `0`, end;
375	phys_addr_t size, sizesum, sizeleft;
376	struct cma_init_memrange mrp, mlp, *failed;
377	struct cma_memrange *cmrp;
378	LIST_HEAD(ranges);
379	LIST_HEAD(final_ranges);
380	struct list_head mp, next;
381	int ret, nr = `1`;
382	u64 i;
383	struct cma *cma;
384
385	/*
386	* First, try it the normal way, producing just one range.
387	*/
388	ret = __cma_declare_contiguous_nid(basep: &start, size: total_size, limit: `0`, alignment: align,
389	order_per_bit, fixed: false, name, res_cma, nid);
390	if (ret != -ENOMEM)
391	goto out;
392
393	/*
394	* Couldn't find one range that fits our needs, so try multiple
395	* ranges.
396	*
397	* No need to do the alignment checks here, the call to
398	* cma_declare_contiguous_nid above would have caught
399	* any issues. With the checks, we know that:
400	*
401	* - @align is a power of 2
402	* - @align is >= pageblock alignment
403	* - @size is aligned to @align and to @order_per_bit
404	*
405	* So, as long as we create ranges that have a base
406	* aligned to @align, and a size that is aligned to
407	* both @align and @order_to_bit, things will work out.
408	*/
409	nr = `0`;
410	sizesum = `0`;
411	failed = NULL;
412
413	ret = cma_new_area(name, size: total_size, order_per_bit, res_cma: &cma);
414	if (ret != `0`)
415	goto out;
416
417	align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
418	/*
419	* Create a list of ranges above 4G, largest range first.
420	*/
421	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
422	if (upper_32_bits(start) == `0`)
423	continue;
424
425	start = ALIGN(start, align);
426	if (start >= end)
427	continue;
428
429	end = ALIGN_DOWN(end, align);
430	if (end <= start)
431	continue;
432
433	size = end - start;
434	size = ALIGN_DOWN(size, (PAGE_SIZE << order_per_bit));
435	if (!size)
436	continue;
437	sizesum += size;
438
439	pr_debug("consider %016llx - %016llx\n", (u64)start, (u64)end);
440
441	/*
442	* If we don't yet have used the maximum number of
443	* areas, grab a new one.
444	*
445	* If we can't use anymore, see if this range is not
446	* smaller than the smallest one already recorded. If
447	* not, re-use the smallest element.
448	*/
449	if (nr < CMA_MAX_RANGES)
450	mrp = &memranges[nr++];
451	else {
452	mrp = list_last_entry(&ranges,
453	struct cma_init_memrange, list);
454	if (size < mrp->size)
455	continue;
456	list_del(entry: &mrp->list);
457	sizesum -= mrp->size;
458	pr_debug("deleted %016llx - %016llx from the list\n",
459	(u64)mrp->base, (u64)mrp->base + size);
460	}
461	mrp->base = start;
462	mrp->size = size;
463
464	/*
465	* Now do a sorted insert.
466	*/
467	list_insert_sorted(ranges: &ranges, mrp, cmp: revsizecmp);
468	pr_debug("added %016llx - %016llx to the list\n",
469	(u64)mrp->base, (u64)mrp->base + size);
470	pr_debug("total size now %llu\n", (u64)sizesum);
471	}
472
473	/*
474	* There is not enough room in the CMA_MAX_RANGES largest
475	* ranges, so bail out.
476	*/
477	if (sizesum < total_size) {
478	cma_drop_area(cma);
479	ret = -ENOMEM;
480	goto out;
481	}
482
483	/*
484	* Found ranges that provide enough combined space.
485	* Now, sorted them by address, smallest first, because we
486	* want to mimic a bottom-up memblock allocation.
487	*/
488	sizesum = `0`;
489	list_for_each_safe(mp, next, &ranges) {
490	mlp = list_entry(mp, struct cma_init_memrange, list);
491	list_del(entry: mp);
492	list_insert_sorted(ranges: &final_ranges, mrp: mlp, cmp: basecmp);
493	sizesum += mlp->size;
494	if (sizesum >= total_size)
495	break;
496	}
497
498	/*
499	* Walk the final list, and add a CMA range for
500	* each range, possibly not using the last one fully.
501	*/
502	nr = `0`;
503	sizeleft = total_size;
504	list_for_each(mp, &final_ranges) {
505	mlp = list_entry(mp, struct cma_init_memrange, list);
506	size = min(sizeleft, mlp->size);
507	if (memblock_reserve(base: mlp->base, size)) {
508	/*
509	* Unexpected error. Could go on to
510	* the next one, but just abort to
511	* be safe.
512	*/
513	failed = mlp;
514	break;
515	}
516
517	pr_debug("created region %d: %016llx - %016llx\n",
518	nr, (u64)mlp->base, (u64)mlp->base + size);
519	cmrp = &cma->ranges[nr++];
520	cmrp->base_pfn = PHYS_PFN(mlp->base);
521	cmrp->early_pfn = cmrp->base_pfn;
522	cmrp->count = size >> PAGE_SHIFT;
523
524	sizeleft -= size;
525	if (sizeleft == `0`)
526	break;
527	}
528
529	if (failed) {
530	list_for_each(mp, &final_ranges) {
531	mlp = list_entry(mp, struct cma_init_memrange, list);
532	if (mlp == failed)
533	break;
534	memblock_phys_free(base: mlp->base, size: mlp->size);
535	}
536	cma_drop_area(cma);
537	ret = -ENOMEM;
538	goto out;
539	}
540
541	cma->nranges = nr;
542	cma->nid = nid;
543	*res_cma = cma;
544
545	out:
546	if (ret != `0`)
547	pr_err("Failed to reserve %lu MiB\n",
548	(unsigned long)total_size / SZ_1M);
549	else
550	pr_info("Reserved %lu MiB in %d range%s\n",
551	(unsigned long)total_size / SZ_1M, nr,
552	nr > `1` ? "s" : "");
553	return ret;
554	}
555
556	/**
557	* cma_declare_contiguous_nid() - reserve custom contiguous area
558	* @base: Base address of the reserved area optional, use 0 for any
559	* @size: Size of the reserved area (in bytes),
560	* @limit: End address of the reserved memory (optional, 0 for any).
561	* @alignment: Alignment for the CMA area, should be power of 2 or zero
562	* @order_per_bit: Order of pages represented by one bit on bitmap.
563	* @fixed: hint about where to place the reserved area
564	* @name: The name of the area. See function cma_init_reserved_mem()
565	* @res_cma: Pointer to store the created cma region.
566	* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
567	*
568	* This function reserves memory from early allocator. It should be
569	* called by arch specific code once the early allocator (memblock or bootmem)
570	* has been activated and all other subsystems have already allocated/reserved
571	* memory. This function allows to create custom reserved areas.
572	*
573	* If @fixed is true, reserve contiguous area at exactly @base. If false,
574	* reserve in range from @base to @limit.
575	*/
576	int __init cma_declare_contiguous_nid(phys_addr_t base,
577	phys_addr_t size, phys_addr_t limit,
578	phys_addr_t alignment, unsigned int order_per_bit,
579	bool fixed, const char name, struct* cma **res_cma,
580	int nid)
581	{
582	int ret;
583
584	ret = __cma_declare_contiguous_nid(basep: &base, size, limit, alignment,
585	order_per_bit, fixed, name, res_cma, nid);
586	if (ret != `0`)
587	pr_err("Failed to reserve %ld MiB\n",
588	(unsigned long)size / SZ_1M);
589	else
590	pr_info("Reserved %ld MiB at %pa\n",
591	(unsigned long)size / SZ_1M, &base);
592
593	return ret;
594	}
595
596	static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
597	phys_addr_t size, phys_addr_t limit,
598	phys_addr_t alignment, unsigned int order_per_bit,
599	bool fixed, const char name, struct* cma **res_cma,
600	int nid)
601	{
602	phys_addr_t memblock_end = memblock_end_of_DRAM();
603	phys_addr_t highmem_start, base = *basep;
604	int ret;
605
606	/*
607	* We can't use __pa(high_memory) directly, since high_memory
608	* isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
609	* complain. Find the boundary by adding one to the last valid
610	* address.
611	*/
612	if (IS_ENABLED(CONFIG_HIGHMEM))
613	highmem_start = __pa(high_memory - `1`) + `1`;
614	else
615	highmem_start = memblock_end_of_DRAM();
616	pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
617	__func__, &size, &base, &limit, &alignment);
618
619	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
620	pr_err("Not enough slots for CMA reserved regions!\n");
621	return -ENOSPC;
622	}
623
624	if (!size)
625	return -EINVAL;
626
627	if (alignment && !is_power_of_2(n: alignment))
628	return -EINVAL;
629
630	if (!IS_ENABLED(CONFIG_NUMA))
631	nid = NUMA_NO_NODE;
632
633	/ Sanitise input arguments. /
634	alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES);
635	if (fixed && base & (alignment - `1`)) {
636	pr_err("Region at %pa must be aligned to %pa bytes\n",
637	&base, &alignment);
638	return -EINVAL;
639	}
640	base = ALIGN(base, alignment);
641	size = ALIGN(size, alignment);
642	limit &= ~(alignment - `1`);
643
644	if (!base)
645	fixed = false;
646
647	/ size should be aligned with order_per_bit /
648	if (!IS_ALIGNED(size >> PAGE_SHIFT, `1` << order_per_bit))
649	return -EINVAL;
650
651	/*
652	* If allocating at a fixed base the request region must not cross the
653	* low/high memory boundary.
654	*/
655	if (fixed && base < highmem_start && base + size > highmem_start) {
656	pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
657	&base, &highmem_start);
658	return -EINVAL;
659	}
660
661	/*
662	* If the limit is unspecified or above the memblock end, its effective
663	* value will be the memblock end. Set it explicitly to simplify further
664	* checks.
665	*/
666	if (limit == `0` \|\| limit > memblock_end)
667	limit = memblock_end;
668
669	if (base + size > limit) {
670	pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
671	&size, &base, &limit);
672	return -EINVAL;
673	}
674
675	/ Reserve memory /
676	if (fixed) {
677	if (memblock_is_region_reserved(base, size) \|\|
678	memblock_reserve(base, size) < `0`) {
679	return -EBUSY;
680	}
681	} else {
682	phys_addr_t addr = `0`;
683
684	/*
685	* If there is enough memory, try a bottom-up allocation first.
686	* It will place the new cma area close to the start of the node
687	* and guarantee that the compaction is moving pages out of the
688	* cma area and not into it.
689	* Avoid using first 4GB to not interfere with constrained zones
690	* like DMA/DMA32.
691	*/
692	#ifdef CONFIG_PHYS_ADDR_T_64BIT
693	if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
694	memblock_set_bottom_up(enable: true);
695	addr = memblock_alloc_range_nid(size, align: alignment, SZ_4G,
696	end: limit, nid, exact_nid: true);
697	memblock_set_bottom_up(enable: false);
698	}
699	#endif
700
701	/*
702	* All pages in the reserved area must come from the same zone.
703	* If the requested region crosses the low/high memory boundary,
704	* try allocating from high memory first and fall back to low
705	* memory in case of failure.
706	*/
707	if (!addr && base < highmem_start && limit > highmem_start) {
708	addr = memblock_alloc_range_nid(size, align: alignment,
709	start: highmem_start, end: limit, nid, exact_nid: true);
710	limit = highmem_start;
711	}
712
713	if (!addr) {
714	addr = memblock_alloc_range_nid(size, align: alignment, start: base,
715	end: limit, nid, exact_nid: true);
716	if (!addr)
717	return -ENOMEM;
718	}
719
720	/*
721	* kmemleak scans/reads tracked objects for pointers to other
722	* objects but this address isn't mapped and accessible
723	*/
724	kmemleak_ignore_phys(phys: addr);
725	base = addr;
726	}
727
728	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
729	if (ret) {
730	memblock_phys_free(base, size);
731	return ret;
732	}
733
734	(*res_cma)->nid = nid;
735	*basep = base;
736
737	return `0`;
738	}
739
740	static void cma_debug_show_areas(struct cma *cma)
741	{
742	unsigned long next_zero_bit, next_set_bit, nr_zero;
743	unsigned long start;
744	unsigned long nr_part;
745	unsigned long nbits;
746	int r;
747	struct cma_memrange *cmr;
748
749	spin_lock_irq(lock: &cma->lock);
750	pr_info("number of available pages: ");
751	for (r = `0`; r < cma->nranges; r++) {
752	cmr = &cma->ranges[r];
753
754	start = `0`;
755	nbits = cma_bitmap_maxno(cma, cmr);
756
757	pr_info("range %d: ", r);
758	for (;;) {
759	next_zero_bit = find_next_zero_bit(addr: cmr->bitmap,
760	size: nbits, offset: start);
761	if (next_zero_bit >= nbits)
762	break;
763	next_set_bit = find_next_bit(addr: cmr->bitmap, size: nbits,
764	offset: next_zero_bit);
765	nr_zero = next_set_bit - next_zero_bit;
766	nr_part = nr_zero << cma->order_per_bit;
767	pr_cont("%s%lu@%lu", start ? "+" : "", nr_part,
768	next_zero_bit);
769	start = next_zero_bit + nr_zero;
770	}
771	pr_info("\n");
772	}
773	pr_cont("=> %lu free of %lu total pages\n", cma->available_count,
774	cma->count);
775	spin_unlock_irq(lock: &cma->lock);
776	}
777
778	static int cma_range_alloc(struct cma cma, struct* cma_memrange *cmr,
779	unsigned long count, unsigned int align,
780	struct page **pagep, gfp_t gfp)
781	{
782	unsigned long mask, offset;
783	unsigned long pfn = -`1`;
784	unsigned long start = `0`;
785	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
786	int ret = -EBUSY;
787	struct page *page = NULL;
788
789	mask = cma_bitmap_aligned_mask(cma, align_order: align);
790	offset = cma_bitmap_aligned_offset(cma, cmr, align_order: align);
791	bitmap_maxno = cma_bitmap_maxno(cma, cmr);
792	bitmap_count = cma_bitmap_pages_to_bits(cma, pages: count);
793
794	if (bitmap_count > bitmap_maxno)
795	goto out;
796
797	for (;;) {
798	spin_lock_irq(lock: &cma->lock);
799	/*
800	* If the request is larger than the available number
801	* of pages, stop right away.
802	*/
803	if (count > cma->available_count) {
804	spin_unlock_irq(lock: &cma->lock);
805	break;
806	}
807	bitmap_no = bitmap_find_next_zero_area_off(map: cmr->bitmap,
808	size: bitmap_maxno, start, nr: bitmap_count, align_mask: mask,
809	align_offset: offset);
810	if (bitmap_no >= bitmap_maxno) {
811	spin_unlock_irq(lock: &cma->lock);
812	break;
813	}
814	bitmap_set(map: cmr->bitmap, start: bitmap_no, nbits: bitmap_count);
815	cma->available_count -= count;
816	/*
817	* It's safe to drop the lock here. We've marked this region for
818	* our exclusive use. If the migration fails we will take the
819	* lock again and unmark it.
820	*/
821	spin_unlock_irq(lock: &cma->lock);
822
823	pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
824	mutex_lock(&cma->alloc_mutex);
825	ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp);
826	mutex_unlock(lock: &cma->alloc_mutex);
827	if (ret == `0`) {
828	page = pfn_to_page(pfn);
829	break;
830	}
831
832	cma_clear_bitmap(cma, cmr, pfn, count);
833	if (ret != -EBUSY)
834	break;
835
836	pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
837	__func__, pfn, pfn_to_page(pfn));
838
839	trace_cma_alloc_busy_retry(name: cma->name, pfn, pfn_to_page(pfn),
840	count, align);
841	/ try again with a bit different memory target /
842	start = bitmap_no + mask + `1`;
843	}
844	out:
845	*pagep = page;
846	return ret;
847	}
848
849	static struct page __cma_alloc(struct* cma cma, unsigned* long count,
850	unsigned int align, gfp_t gfp)
851	{
852	struct page *page = NULL;
853	int ret = -ENOMEM, r;
854	unsigned long i;
855	const char *name = cma ? cma->name : NULL;
856
857	trace_cma_alloc_start(name, count, align);
858
859	if (!cma \|\| !cma->count)
860	return page;
861
862	pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
863	(void *)cma, cma->name, count, align);
864
865	if (!count)
866	return page;
867
868	for (r = `0`; r < cma->nranges; r++) {
869	page = NULL;
870
871	ret = cma_range_alloc(cma, cmr: &cma->ranges[r], count, align,
872	pagep: &page, gfp);
873	if (ret != -EBUSY \|\| page)
874	break;
875	}
876
877	/*
878	* CMA can allocate multiple page blocks, which results in different
879	* blocks being marked with different tags. Reset the tags to ignore
880	* those page blocks.
881	*/
882	if (page) {
883	for (i = `0`; i < count; i++)
884	page_kasan_tag_reset(nth_page(page, i));
885	}
886
887	if (ret && !(gfp & __GFP_NOWARN)) {
888	pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n",
889	__func__, cma->name, count, ret);
890	cma_debug_show_areas(cma);
891	}
892
893	pr_debug("%s(): returned %p\n", __func__, page);
894	trace_cma_alloc_finish(name, pfn: page ? page_to_pfn(page) : `0`,
895	page, count, align, errorno: ret);
896	if (page) {
897	count_vm_event(item: CMA_ALLOC_SUCCESS);
898	cma_sysfs_account_success_pages(cma, nr_pages: count);
899	} else {
900	count_vm_event(item: CMA_ALLOC_FAIL);
901	cma_sysfs_account_fail_pages(cma, nr_pages: count);
902	}
903
904	return page;
905	}
906
907	/**
908	* cma_alloc() - allocate pages from contiguous area
909	* @cma: Contiguous memory region for which the allocation is performed.
910	* @count: Requested number of pages.
911	* @align: Requested alignment of pages (in PAGE_SIZE order).
912	* @no_warn: Avoid printing message about failed allocation
913	*
914	* This function allocates part of contiguous memory on specific
915	* contiguous memory area.
916	*/
917	struct page cma_alloc(struct* cma cma, unsigned* long count,
918	unsigned int align, bool no_warn)
919	{
920	return __cma_alloc(cma, count, align, GFP_KERNEL \| (no_warn ? __GFP_NOWARN : `0`));
921	}
922
923	struct folio cma_alloc_folio(struct* cma cma, int* order, gfp_t gfp)
924	{
925	struct page *page;
926
927	if (WARN_ON(!order \|\| !(gfp & __GFP_COMP)))
928	return NULL;
929
930	page = __cma_alloc(cma, count: `1` << order, align: order, gfp);
931
932	return page ? page_folio(page) : NULL;
933	}
934
935	bool cma_pages_valid(struct cma cma, const* struct page *pages,
936	unsigned long count)
937	{
938	unsigned long pfn, end;
939	int r;
940	struct cma_memrange *cmr;
941	bool ret;
942
943	if (!cma \|\| !pages \|\| count > cma->count)
944	return false;
945
946	pfn = page_to_pfn(pages);
947	ret = false;
948
949	for (r = `0`; r < cma->nranges; r++) {
950	cmr = &cma->ranges[r];
951	end = cmr->base_pfn + cmr->count;
952	if (pfn >= cmr->base_pfn && pfn < end) {
953	ret = pfn + count <= end;
954	break;
955	}
956	}
957
958	if (!ret)
959	pr_debug("%s(page %p, count %lu)\n",
960	__func__, (void *)pages, count);
961
962	return ret;
963	}
964
965	/**
966	* cma_release() - release allocated pages
967	* @cma: Contiguous memory region for which the allocation is performed.
968	* @pages: Allocated pages.
969	* @count: Number of allocated pages.
970	*
971	* This function releases memory allocated by cma_alloc().
972	* It returns false when provided pages do not belong to contiguous area and
973	* true otherwise.
974	*/
975	bool cma_release(struct cma cma, const* struct page *pages,
976	unsigned long count)
977	{
978	struct cma_memrange *cmr;
979	unsigned long pfn, end_pfn;
980	int r;
981
982	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
983
984	if (!cma_pages_valid(cma, pages, count))
985	return false;
986
987	pfn = page_to_pfn(pages);
988	end_pfn = pfn + count;
989
990	for (r = `0`; r < cma->nranges; r++) {
991	cmr = &cma->ranges[r];
992	if (pfn >= cmr->base_pfn &&
993	pfn < (cmr->base_pfn + cmr->count)) {
994	VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count);
995	break;
996	}
997	}
998
999	if (r == cma->nranges)
1000	return false;
1001
1002	free_contig_range(pfn, nr_pages: count);
1003	cma_clear_bitmap(cma, cmr, pfn, count);
1004	cma_sysfs_account_release_pages(cma, nr_pages: count);
1005	trace_cma_release(name: cma->name, pfn, page: pages, count);
1006
1007	return true;
1008	}
1009
1010	bool cma_free_folio(struct cma cma, const* struct folio *folio)
1011	{
1012	if (WARN_ON(!folio_test_large(folio)))
1013	return false;
1014
1015	return cma_release(cma, pages: &folio->page, count: folio_nr_pages(folio));
1016	}
1017
1018	int cma_for_each_area(int (it)(struct* cma cma, void* data), void* *data)
1019	{
1020	int i;
1021
1022	for (i = `0`; i < cma_area_count; i++) {
1023	int ret = it(&cma_areas[i], data);
1024
1025	if (ret)
1026	return ret;
1027	}
1028
1029	return `0`;
1030	}
1031
1032	bool cma_intersects(struct cma cma, unsigned* long start, unsigned long end)
1033	{
1034	int r;
1035	struct cma_memrange *cmr;
1036	unsigned long rstart, rend;
1037
1038	for (r = `0`; r < cma->nranges; r++) {
1039	cmr = &cma->ranges[r];
1040
1041	rstart = PFN_PHYS(cmr->base_pfn);
1042	rend = PFN_PHYS(cmr->base_pfn + cmr->count);
1043	if (end < rstart)
1044	continue;
1045	if (start >= rend)
1046	continue;
1047	return true;
1048	}
1049
1050	return false;
1051	}
1052
1053	/*
1054	* Very basic function to reserve memory from a CMA area that has not
1055	* yet been activated. This is expected to be called early, when the
1056	* system is single-threaded, so there is no locking. The alignment
1057	* checking is restrictive - only pageblock-aligned areas
1058	* (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function.
1059	* This keeps things simple, and is enough for the current use case.
1060	*
1061	* The CMA bitmaps have not yet been allocated, so just start
1062	* reserving from the bottom up, using a PFN to keep track
1063	* of what has been reserved. Unreserving is not possible.
1064	*
1065	* The caller is responsible for initializing the page structures
1066	* in the area properly, since this just points to memblock-allocated
1067	* memory. The caller should subsequently use init_cma_pageblock to
1068	* set the migrate type and CMA stats the pageblocks that were reserved.
1069	*
1070	* If the CMA area fails to activate later, memory obtained through
1071	* this interface is not handed to the page allocator, this is
1072	* the responsibility of the caller (e.g. like normal memblock-allocated
1073	* memory).
1074	*/
1075	void __init cma_reserve_early(struct* cma cma, unsigned* long size)
1076	{
1077	int r;
1078	struct cma_memrange *cmr;
1079	unsigned long available;
1080	void *ret = NULL;
1081
1082	if (!cma \|\| !cma->count)
1083	return NULL;
1084	/*
1085	* Can only be called early in init.
1086	*/
1087	if (test_bit(CMA_ACTIVATED, &cma->flags))
1088	return NULL;
1089
1090	if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES))
1091	return NULL;
1092
1093	if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit)))
1094	return NULL;
1095
1096	size >>= PAGE_SHIFT;
1097
1098	if (size > cma->available_count)
1099	return NULL;
1100
1101	for (r = `0`; r < cma->nranges; r++) {
1102	cmr = &cma->ranges[r];
1103	available = cmr->count - (cmr->early_pfn - cmr->base_pfn);
1104	if (size <= available) {
1105	ret = phys_to_virt(PFN_PHYS(cmr->early_pfn));
1106	cmr->early_pfn += size;
1107	cma->available_count -= size;
1108	return ret;
1109	}
1110	}
1111
1112	return ret;
1113	}
1114

Provided by KDAB

Definitions

source code of linux/mm/cma.c