1 | // SPDX-License-Identifier: GPL-2.0-or-later |
---|---|
2 | /* |
3 | * Contiguous Memory Allocator |
4 | * |
5 | * Copyright (c) 2010-2011 by Samsung Electronics. |
6 | * Copyright IBM Corporation, 2013 |
7 | * Copyright LG Electronics Inc., 2014 |
8 | * Written by: |
9 | * Marek Szyprowski <m.szyprowski@samsung.com> |
10 | * Michal Nazarewicz <mina86@mina86.com> |
11 | * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> |
12 | * Joonsoo Kim <iamjoonsoo.kim@lge.com> |
13 | */ |
14 | |
15 | #define pr_fmt(fmt) "cma: " fmt |
16 | |
17 | #define CREATE_TRACE_POINTS |
18 | |
19 | #include <linux/memblock.h> |
20 | #include <linux/err.h> |
21 | #include <linux/list.h> |
22 | #include <linux/mm.h> |
23 | #include <linux/sizes.h> |
24 | #include <linux/slab.h> |
25 | #include <linux/log2.h> |
26 | #include <linux/cma.h> |
27 | #include <linux/highmem.h> |
28 | #include <linux/io.h> |
29 | #include <linux/kmemleak.h> |
30 | #include <trace/events/cma.h> |
31 | |
32 | #include "internal.h" |
33 | #include "cma.h" |
34 | |
35 | struct cma cma_areas[MAX_CMA_AREAS]; |
36 | unsigned int cma_area_count; |
37 | |
38 | static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, |
39 | phys_addr_t size, phys_addr_t limit, |
40 | phys_addr_t alignment, unsigned int order_per_bit, |
41 | bool fixed, const char *name, struct cma **res_cma, |
42 | int nid); |
43 | |
44 | phys_addr_t cma_get_base(const struct cma *cma) |
45 | { |
46 | WARN_ON_ONCE(cma->nranges != 1); |
47 | return PFN_PHYS(cma->ranges[0].base_pfn); |
48 | } |
49 | |
50 | unsigned long cma_get_size(const struct cma *cma) |
51 | { |
52 | return cma->count << PAGE_SHIFT; |
53 | } |
54 | |
55 | const char *cma_get_name(const struct cma *cma) |
56 | { |
57 | return cma->name; |
58 | } |
59 | |
60 | static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, |
61 | unsigned int align_order) |
62 | { |
63 | if (align_order <= cma->order_per_bit) |
64 | return 0; |
65 | return (1UL << (align_order - cma->order_per_bit)) - 1; |
66 | } |
67 | |
68 | /* |
69 | * Find the offset of the base PFN from the specified align_order. |
70 | * The value returned is represented in order_per_bits. |
71 | */ |
72 | static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, |
73 | const struct cma_memrange *cmr, |
74 | unsigned int align_order) |
75 | { |
76 | return (cmr->base_pfn & ((1UL << align_order) - 1)) |
77 | >> cma->order_per_bit; |
78 | } |
79 | |
80 | static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, |
81 | unsigned long pages) |
82 | { |
83 | return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; |
84 | } |
85 | |
86 | static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *cmr, |
87 | unsigned long pfn, unsigned long count) |
88 | { |
89 | unsigned long bitmap_no, bitmap_count; |
90 | unsigned long flags; |
91 | |
92 | bitmap_no = (pfn - cmr->base_pfn) >> cma->order_per_bit; |
93 | bitmap_count = cma_bitmap_pages_to_bits(cma, pages: count); |
94 | |
95 | spin_lock_irqsave(&cma->lock, flags); |
96 | bitmap_clear(map: cmr->bitmap, start: bitmap_no, nbits: bitmap_count); |
97 | cma->available_count += count; |
98 | spin_unlock_irqrestore(lock: &cma->lock, flags); |
99 | } |
100 | |
101 | /* |
102 | * Check if a CMA area contains no ranges that intersect with |
103 | * multiple zones. Store the result in the flags in case |
104 | * this gets called more than once. |
105 | */ |
106 | bool cma_validate_zones(struct cma *cma) |
107 | { |
108 | int r; |
109 | unsigned long base_pfn; |
110 | struct cma_memrange *cmr; |
111 | bool valid_bit_set; |
112 | |
113 | /* |
114 | * If already validated, return result of previous check. |
115 | * Either the valid or invalid bit will be set if this |
116 | * check has already been done. If neither is set, the |
117 | * check has not been performed yet. |
118 | */ |
119 | valid_bit_set = test_bit(CMA_ZONES_VALID, &cma->flags); |
120 | if (valid_bit_set || test_bit(CMA_ZONES_INVALID, &cma->flags)) |
121 | return valid_bit_set; |
122 | |
123 | for (r = 0; r < cma->nranges; r++) { |
124 | cmr = &cma->ranges[r]; |
125 | base_pfn = cmr->base_pfn; |
126 | |
127 | /* |
128 | * alloc_contig_range() requires the pfn range specified |
129 | * to be in the same zone. Simplify by forcing the entire |
130 | * CMA resv range to be in the same zone. |
131 | */ |
132 | WARN_ON_ONCE(!pfn_valid(base_pfn)); |
133 | if (pfn_range_intersects_zones(nid: cma->nid, start_pfn: base_pfn, nr_pages: cmr->count)) { |
134 | set_bit(nr: CMA_ZONES_INVALID, addr: &cma->flags); |
135 | return false; |
136 | } |
137 | } |
138 | |
139 | set_bit(nr: CMA_ZONES_VALID, addr: &cma->flags); |
140 | |
141 | return true; |
142 | } |
143 | |
144 | static void __init cma_activate_area(struct cma *cma) |
145 | { |
146 | unsigned long pfn, end_pfn, early_pfn[CMA_MAX_RANGES]; |
147 | int allocrange, r; |
148 | struct cma_memrange *cmr; |
149 | unsigned long bitmap_count, count; |
150 | |
151 | for (allocrange = 0; allocrange < cma->nranges; allocrange++) { |
152 | cmr = &cma->ranges[allocrange]; |
153 | early_pfn[allocrange] = cmr->early_pfn; |
154 | cmr->bitmap = bitmap_zalloc(nbits: cma_bitmap_maxno(cma, cmr), |
155 | GFP_KERNEL); |
156 | if (!cmr->bitmap) |
157 | goto cleanup; |
158 | } |
159 | |
160 | if (!cma_validate_zones(cma)) |
161 | goto cleanup; |
162 | |
163 | for (r = 0; r < cma->nranges; r++) { |
164 | cmr = &cma->ranges[r]; |
165 | if (early_pfn[r] != cmr->base_pfn) { |
166 | count = early_pfn[r] - cmr->base_pfn; |
167 | bitmap_count = cma_bitmap_pages_to_bits(cma, pages: count); |
168 | bitmap_set(map: cmr->bitmap, start: 0, nbits: bitmap_count); |
169 | } |
170 | |
171 | for (pfn = early_pfn[r]; pfn < cmr->base_pfn + cmr->count; |
172 | pfn += pageblock_nr_pages) |
173 | init_cma_reserved_pageblock(pfn_to_page(pfn)); |
174 | } |
175 | |
176 | spin_lock_init(&cma->lock); |
177 | |
178 | mutex_init(&cma->alloc_mutex); |
179 | |
180 | #ifdef CONFIG_CMA_DEBUGFS |
181 | INIT_HLIST_HEAD(&cma->mem_head); |
182 | spin_lock_init(&cma->mem_head_lock); |
183 | #endif |
184 | set_bit(nr: CMA_ACTIVATED, addr: &cma->flags); |
185 | |
186 | return; |
187 | |
188 | cleanup: |
189 | for (r = 0; r < allocrange; r++) |
190 | bitmap_free(bitmap: cma->ranges[r].bitmap); |
191 | |
192 | /* Expose all pages to the buddy, they are useless for CMA. */ |
193 | if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) { |
194 | for (r = 0; r < allocrange; r++) { |
195 | cmr = &cma->ranges[r]; |
196 | end_pfn = cmr->base_pfn + cmr->count; |
197 | for (pfn = early_pfn[r]; pfn < end_pfn; pfn++) |
198 | free_reserved_page(pfn_to_page(pfn)); |
199 | } |
200 | } |
201 | totalcma_pages -= cma->count; |
202 | cma->available_count = cma->count = 0; |
203 | pr_err("CMA area %s could not be activated\n", cma->name); |
204 | } |
205 | |
206 | static int __init cma_init_reserved_areas(void) |
207 | { |
208 | int i; |
209 | |
210 | for (i = 0; i < cma_area_count; i++) |
211 | cma_activate_area(cma: &cma_areas[i]); |
212 | |
213 | return 0; |
214 | } |
215 | core_initcall(cma_init_reserved_areas); |
216 | |
217 | void __init cma_reserve_pages_on_error(struct cma *cma) |
218 | { |
219 | set_bit(nr: CMA_RESERVE_PAGES_ON_ERROR, addr: &cma->flags); |
220 | } |
221 | |
222 | static int __init cma_new_area(const char *name, phys_addr_t size, |
223 | unsigned int order_per_bit, |
224 | struct cma **res_cma) |
225 | { |
226 | struct cma *cma; |
227 | |
228 | if (cma_area_count == ARRAY_SIZE(cma_areas)) { |
229 | pr_err("Not enough slots for CMA reserved regions!\n"); |
230 | return -ENOSPC; |
231 | } |
232 | |
233 | /* |
234 | * Each reserved area must be initialised later, when more kernel |
235 | * subsystems (like slab allocator) are available. |
236 | */ |
237 | cma = &cma_areas[cma_area_count]; |
238 | cma_area_count++; |
239 | |
240 | if (name) |
241 | snprintf(buf: cma->name, CMA_MAX_NAME, fmt: "%s", name); |
242 | else |
243 | snprintf(buf: cma->name, CMA_MAX_NAME, fmt: "cma%d\n", cma_area_count); |
244 | |
245 | cma->available_count = cma->count = size >> PAGE_SHIFT; |
246 | cma->order_per_bit = order_per_bit; |
247 | *res_cma = cma; |
248 | totalcma_pages += cma->count; |
249 | |
250 | return 0; |
251 | } |
252 | |
253 | static void __init cma_drop_area(struct cma *cma) |
254 | { |
255 | totalcma_pages -= cma->count; |
256 | cma_area_count--; |
257 | } |
258 | |
259 | /** |
260 | * cma_init_reserved_mem() - create custom contiguous area from reserved memory |
261 | * @base: Base address of the reserved area |
262 | * @size: Size of the reserved area (in bytes), |
263 | * @order_per_bit: Order of pages represented by one bit on bitmap. |
264 | * @name: The name of the area. If this parameter is NULL, the name of |
265 | * the area will be set to "cmaN", where N is a running counter of |
266 | * used areas. |
267 | * @res_cma: Pointer to store the created cma region. |
268 | * |
269 | * This function creates custom contiguous area from already reserved memory. |
270 | */ |
271 | int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, |
272 | unsigned int order_per_bit, |
273 | const char *name, |
274 | struct cma **res_cma) |
275 | { |
276 | struct cma *cma; |
277 | int ret; |
278 | |
279 | /* Sanity checks */ |
280 | if (!size || !memblock_is_region_reserved(base, size)) |
281 | return -EINVAL; |
282 | |
283 | /* |
284 | * CMA uses CMA_MIN_ALIGNMENT_BYTES as alignment requirement which |
285 | * needs pageblock_order to be initialized. Let's enforce it. |
286 | */ |
287 | if (!pageblock_order) { |
288 | pr_err("pageblock_order not yet initialized. Called during early boot?\n"); |
289 | return -EINVAL; |
290 | } |
291 | |
292 | /* ensure minimal alignment required by mm core */ |
293 | if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES)) |
294 | return -EINVAL; |
295 | |
296 | ret = cma_new_area(name, size, order_per_bit, res_cma: &cma); |
297 | if (ret != 0) |
298 | return ret; |
299 | |
300 | cma->ranges[0].base_pfn = PFN_DOWN(base); |
301 | cma->ranges[0].early_pfn = PFN_DOWN(base); |
302 | cma->ranges[0].count = cma->count; |
303 | cma->nranges = 1; |
304 | cma->nid = NUMA_NO_NODE; |
305 | |
306 | *res_cma = cma; |
307 | |
308 | return 0; |
309 | } |
310 | |
311 | /* |
312 | * Structure used while walking physical memory ranges and finding out |
313 | * which one(s) to use for a CMA area. |
314 | */ |
315 | struct cma_init_memrange { |
316 | phys_addr_t base; |
317 | phys_addr_t size; |
318 | struct list_head list; |
319 | }; |
320 | |
321 | /* |
322 | * Work array used during CMA initialization. |
323 | */ |
324 | static struct cma_init_memrange memranges[CMA_MAX_RANGES] __initdata; |
325 | |
326 | static bool __init revsizecmp(struct cma_init_memrange *mlp, |
327 | struct cma_init_memrange *mrp) |
328 | { |
329 | return mlp->size > mrp->size; |
330 | } |
331 | |
332 | static bool __init basecmp(struct cma_init_memrange *mlp, |
333 | struct cma_init_memrange *mrp) |
334 | { |
335 | return mlp->base < mrp->base; |
336 | } |
337 | |
338 | /* |
339 | * Helper function to create sorted lists. |
340 | */ |
341 | static void __init list_insert_sorted( |
342 | struct list_head *ranges, |
343 | struct cma_init_memrange *mrp, |
344 | bool (*cmp)(struct cma_init_memrange *lh, struct cma_init_memrange *rh)) |
345 | { |
346 | struct list_head *mp; |
347 | struct cma_init_memrange *mlp; |
348 | |
349 | if (list_empty(head: ranges)) |
350 | list_add(new: &mrp->list, head: ranges); |
351 | else { |
352 | list_for_each(mp, ranges) { |
353 | mlp = list_entry(mp, struct cma_init_memrange, list); |
354 | if (cmp(mlp, mrp)) |
355 | break; |
356 | } |
357 | __list_add(new: &mrp->list, prev: mlp->list.prev, next: &mlp->list); |
358 | } |
359 | } |
360 | |
361 | /* |
362 | * Create CMA areas with a total size of @total_size. A normal allocation |
363 | * for one area is tried first. If that fails, the biggest memblock |
364 | * ranges above 4G are selected, and allocated bottom up. |
365 | * |
366 | * The complexity here is not great, but this function will only be |
367 | * called during boot, and the lists operated on have fewer than |
368 | * CMA_MAX_RANGES elements (default value: 8). |
369 | */ |
370 | int __init cma_declare_contiguous_multi(phys_addr_t total_size, |
371 | phys_addr_t align, unsigned int order_per_bit, |
372 | const char *name, struct cma **res_cma, int nid) |
373 | { |
374 | phys_addr_t start = 0, end; |
375 | phys_addr_t size, sizesum, sizeleft; |
376 | struct cma_init_memrange *mrp, *mlp, *failed; |
377 | struct cma_memrange *cmrp; |
378 | LIST_HEAD(ranges); |
379 | LIST_HEAD(final_ranges); |
380 | struct list_head *mp, *next; |
381 | int ret, nr = 1; |
382 | u64 i; |
383 | struct cma *cma; |
384 | |
385 | /* |
386 | * First, try it the normal way, producing just one range. |
387 | */ |
388 | ret = __cma_declare_contiguous_nid(basep: &start, size: total_size, limit: 0, alignment: align, |
389 | order_per_bit, fixed: false, name, res_cma, nid); |
390 | if (ret != -ENOMEM) |
391 | goto out; |
392 | |
393 | /* |
394 | * Couldn't find one range that fits our needs, so try multiple |
395 | * ranges. |
396 | * |
397 | * No need to do the alignment checks here, the call to |
398 | * cma_declare_contiguous_nid above would have caught |
399 | * any issues. With the checks, we know that: |
400 | * |
401 | * - @align is a power of 2 |
402 | * - @align is >= pageblock alignment |
403 | * - @size is aligned to @align and to @order_per_bit |
404 | * |
405 | * So, as long as we create ranges that have a base |
406 | * aligned to @align, and a size that is aligned to |
407 | * both @align and @order_to_bit, things will work out. |
408 | */ |
409 | nr = 0; |
410 | sizesum = 0; |
411 | failed = NULL; |
412 | |
413 | ret = cma_new_area(name, size: total_size, order_per_bit, res_cma: &cma); |
414 | if (ret != 0) |
415 | goto out; |
416 | |
417 | align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); |
418 | /* |
419 | * Create a list of ranges above 4G, largest range first. |
420 | */ |
421 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) { |
422 | if (upper_32_bits(start) == 0) |
423 | continue; |
424 | |
425 | start = ALIGN(start, align); |
426 | if (start >= end) |
427 | continue; |
428 | |
429 | end = ALIGN_DOWN(end, align); |
430 | if (end <= start) |
431 | continue; |
432 | |
433 | size = end - start; |
434 | size = ALIGN_DOWN(size, (PAGE_SIZE << order_per_bit)); |
435 | if (!size) |
436 | continue; |
437 | sizesum += size; |
438 | |
439 | pr_debug("consider %016llx - %016llx\n", (u64)start, (u64)end); |
440 | |
441 | /* |
442 | * If we don't yet have used the maximum number of |
443 | * areas, grab a new one. |
444 | * |
445 | * If we can't use anymore, see if this range is not |
446 | * smaller than the smallest one already recorded. If |
447 | * not, re-use the smallest element. |
448 | */ |
449 | if (nr < CMA_MAX_RANGES) |
450 | mrp = &memranges[nr++]; |
451 | else { |
452 | mrp = list_last_entry(&ranges, |
453 | struct cma_init_memrange, list); |
454 | if (size < mrp->size) |
455 | continue; |
456 | list_del(entry: &mrp->list); |
457 | sizesum -= mrp->size; |
458 | pr_debug("deleted %016llx - %016llx from the list\n", |
459 | (u64)mrp->base, (u64)mrp->base + size); |
460 | } |
461 | mrp->base = start; |
462 | mrp->size = size; |
463 | |
464 | /* |
465 | * Now do a sorted insert. |
466 | */ |
467 | list_insert_sorted(ranges: &ranges, mrp, cmp: revsizecmp); |
468 | pr_debug("added %016llx - %016llx to the list\n", |
469 | (u64)mrp->base, (u64)mrp->base + size); |
470 | pr_debug("total size now %llu\n", (u64)sizesum); |
471 | } |
472 | |
473 | /* |
474 | * There is not enough room in the CMA_MAX_RANGES largest |
475 | * ranges, so bail out. |
476 | */ |
477 | if (sizesum < total_size) { |
478 | cma_drop_area(cma); |
479 | ret = -ENOMEM; |
480 | goto out; |
481 | } |
482 | |
483 | /* |
484 | * Found ranges that provide enough combined space. |
485 | * Now, sorted them by address, smallest first, because we |
486 | * want to mimic a bottom-up memblock allocation. |
487 | */ |
488 | sizesum = 0; |
489 | list_for_each_safe(mp, next, &ranges) { |
490 | mlp = list_entry(mp, struct cma_init_memrange, list); |
491 | list_del(entry: mp); |
492 | list_insert_sorted(ranges: &final_ranges, mrp: mlp, cmp: basecmp); |
493 | sizesum += mlp->size; |
494 | if (sizesum >= total_size) |
495 | break; |
496 | } |
497 | |
498 | /* |
499 | * Walk the final list, and add a CMA range for |
500 | * each range, possibly not using the last one fully. |
501 | */ |
502 | nr = 0; |
503 | sizeleft = total_size; |
504 | list_for_each(mp, &final_ranges) { |
505 | mlp = list_entry(mp, struct cma_init_memrange, list); |
506 | size = min(sizeleft, mlp->size); |
507 | if (memblock_reserve(base: mlp->base, size)) { |
508 | /* |
509 | * Unexpected error. Could go on to |
510 | * the next one, but just abort to |
511 | * be safe. |
512 | */ |
513 | failed = mlp; |
514 | break; |
515 | } |
516 | |
517 | pr_debug("created region %d: %016llx - %016llx\n", |
518 | nr, (u64)mlp->base, (u64)mlp->base + size); |
519 | cmrp = &cma->ranges[nr++]; |
520 | cmrp->base_pfn = PHYS_PFN(mlp->base); |
521 | cmrp->early_pfn = cmrp->base_pfn; |
522 | cmrp->count = size >> PAGE_SHIFT; |
523 | |
524 | sizeleft -= size; |
525 | if (sizeleft == 0) |
526 | break; |
527 | } |
528 | |
529 | if (failed) { |
530 | list_for_each(mp, &final_ranges) { |
531 | mlp = list_entry(mp, struct cma_init_memrange, list); |
532 | if (mlp == failed) |
533 | break; |
534 | memblock_phys_free(base: mlp->base, size: mlp->size); |
535 | } |
536 | cma_drop_area(cma); |
537 | ret = -ENOMEM; |
538 | goto out; |
539 | } |
540 | |
541 | cma->nranges = nr; |
542 | cma->nid = nid; |
543 | *res_cma = cma; |
544 | |
545 | out: |
546 | if (ret != 0) |
547 | pr_err("Failed to reserve %lu MiB\n", |
548 | (unsigned long)total_size / SZ_1M); |
549 | else |
550 | pr_info("Reserved %lu MiB in %d range%s\n", |
551 | (unsigned long)total_size / SZ_1M, nr, |
552 | nr > 1 ? "s": ""); |
553 | return ret; |
554 | } |
555 | |
556 | /** |
557 | * cma_declare_contiguous_nid() - reserve custom contiguous area |
558 | * @base: Base address of the reserved area optional, use 0 for any |
559 | * @size: Size of the reserved area (in bytes), |
560 | * @limit: End address of the reserved memory (optional, 0 for any). |
561 | * @alignment: Alignment for the CMA area, should be power of 2 or zero |
562 | * @order_per_bit: Order of pages represented by one bit on bitmap. |
563 | * @fixed: hint about where to place the reserved area |
564 | * @name: The name of the area. See function cma_init_reserved_mem() |
565 | * @res_cma: Pointer to store the created cma region. |
566 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node |
567 | * |
568 | * This function reserves memory from early allocator. It should be |
569 | * called by arch specific code once the early allocator (memblock or bootmem) |
570 | * has been activated and all other subsystems have already allocated/reserved |
571 | * memory. This function allows to create custom reserved areas. |
572 | * |
573 | * If @fixed is true, reserve contiguous area at exactly @base. If false, |
574 | * reserve in range from @base to @limit. |
575 | */ |
576 | int __init cma_declare_contiguous_nid(phys_addr_t base, |
577 | phys_addr_t size, phys_addr_t limit, |
578 | phys_addr_t alignment, unsigned int order_per_bit, |
579 | bool fixed, const char *name, struct cma **res_cma, |
580 | int nid) |
581 | { |
582 | int ret; |
583 | |
584 | ret = __cma_declare_contiguous_nid(basep: &base, size, limit, alignment, |
585 | order_per_bit, fixed, name, res_cma, nid); |
586 | if (ret != 0) |
587 | pr_err("Failed to reserve %ld MiB\n", |
588 | (unsigned long)size / SZ_1M); |
589 | else |
590 | pr_info("Reserved %ld MiB at %pa\n", |
591 | (unsigned long)size / SZ_1M, &base); |
592 | |
593 | return ret; |
594 | } |
595 | |
596 | static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, |
597 | phys_addr_t size, phys_addr_t limit, |
598 | phys_addr_t alignment, unsigned int order_per_bit, |
599 | bool fixed, const char *name, struct cma **res_cma, |
600 | int nid) |
601 | { |
602 | phys_addr_t memblock_end = memblock_end_of_DRAM(); |
603 | phys_addr_t highmem_start, base = *basep; |
604 | int ret; |
605 | |
606 | /* |
607 | * We can't use __pa(high_memory) directly, since high_memory |
608 | * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly) |
609 | * complain. Find the boundary by adding one to the last valid |
610 | * address. |
611 | */ |
612 | if (IS_ENABLED(CONFIG_HIGHMEM)) |
613 | highmem_start = __pa(high_memory - 1) + 1; |
614 | else |
615 | highmem_start = memblock_end_of_DRAM(); |
616 | pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", |
617 | __func__, &size, &base, &limit, &alignment); |
618 | |
619 | if (cma_area_count == ARRAY_SIZE(cma_areas)) { |
620 | pr_err("Not enough slots for CMA reserved regions!\n"); |
621 | return -ENOSPC; |
622 | } |
623 | |
624 | if (!size) |
625 | return -EINVAL; |
626 | |
627 | if (alignment && !is_power_of_2(n: alignment)) |
628 | return -EINVAL; |
629 | |
630 | if (!IS_ENABLED(CONFIG_NUMA)) |
631 | nid = NUMA_NO_NODE; |
632 | |
633 | /* Sanitise input arguments. */ |
634 | alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES); |
635 | if (fixed && base & (alignment - 1)) { |
636 | pr_err("Region at %pa must be aligned to %pa bytes\n", |
637 | &base, &alignment); |
638 | return -EINVAL; |
639 | } |
640 | base = ALIGN(base, alignment); |
641 | size = ALIGN(size, alignment); |
642 | limit &= ~(alignment - 1); |
643 | |
644 | if (!base) |
645 | fixed = false; |
646 | |
647 | /* size should be aligned with order_per_bit */ |
648 | if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) |
649 | return -EINVAL; |
650 | |
651 | /* |
652 | * If allocating at a fixed base the request region must not cross the |
653 | * low/high memory boundary. |
654 | */ |
655 | if (fixed && base < highmem_start && base + size > highmem_start) { |
656 | pr_err("Region at %pa defined on low/high memory boundary (%pa)\n", |
657 | &base, &highmem_start); |
658 | return -EINVAL; |
659 | } |
660 | |
661 | /* |
662 | * If the limit is unspecified or above the memblock end, its effective |
663 | * value will be the memblock end. Set it explicitly to simplify further |
664 | * checks. |
665 | */ |
666 | if (limit == 0 || limit > memblock_end) |
667 | limit = memblock_end; |
668 | |
669 | if (base + size > limit) { |
670 | pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", |
671 | &size, &base, &limit); |
672 | return -EINVAL; |
673 | } |
674 | |
675 | /* Reserve memory */ |
676 | if (fixed) { |
677 | if (memblock_is_region_reserved(base, size) || |
678 | memblock_reserve(base, size) < 0) { |
679 | return -EBUSY; |
680 | } |
681 | } else { |
682 | phys_addr_t addr = 0; |
683 | |
684 | /* |
685 | * If there is enough memory, try a bottom-up allocation first. |
686 | * It will place the new cma area close to the start of the node |
687 | * and guarantee that the compaction is moving pages out of the |
688 | * cma area and not into it. |
689 | * Avoid using first 4GB to not interfere with constrained zones |
690 | * like DMA/DMA32. |
691 | */ |
692 | #ifdef CONFIG_PHYS_ADDR_T_64BIT |
693 | if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) { |
694 | memblock_set_bottom_up(enable: true); |
695 | addr = memblock_alloc_range_nid(size, align: alignment, SZ_4G, |
696 | end: limit, nid, exact_nid: true); |
697 | memblock_set_bottom_up(enable: false); |
698 | } |
699 | #endif |
700 | |
701 | /* |
702 | * All pages in the reserved area must come from the same zone. |
703 | * If the requested region crosses the low/high memory boundary, |
704 | * try allocating from high memory first and fall back to low |
705 | * memory in case of failure. |
706 | */ |
707 | if (!addr && base < highmem_start && limit > highmem_start) { |
708 | addr = memblock_alloc_range_nid(size, align: alignment, |
709 | start: highmem_start, end: limit, nid, exact_nid: true); |
710 | limit = highmem_start; |
711 | } |
712 | |
713 | if (!addr) { |
714 | addr = memblock_alloc_range_nid(size, align: alignment, start: base, |
715 | end: limit, nid, exact_nid: true); |
716 | if (!addr) |
717 | return -ENOMEM; |
718 | } |
719 | |
720 | /* |
721 | * kmemleak scans/reads tracked objects for pointers to other |
722 | * objects but this address isn't mapped and accessible |
723 | */ |
724 | kmemleak_ignore_phys(phys: addr); |
725 | base = addr; |
726 | } |
727 | |
728 | ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); |
729 | if (ret) { |
730 | memblock_phys_free(base, size); |
731 | return ret; |
732 | } |
733 | |
734 | (*res_cma)->nid = nid; |
735 | *basep = base; |
736 | |
737 | return 0; |
738 | } |
739 | |
740 | static void cma_debug_show_areas(struct cma *cma) |
741 | { |
742 | unsigned long next_zero_bit, next_set_bit, nr_zero; |
743 | unsigned long start; |
744 | unsigned long nr_part; |
745 | unsigned long nbits; |
746 | int r; |
747 | struct cma_memrange *cmr; |
748 | |
749 | spin_lock_irq(lock: &cma->lock); |
750 | pr_info("number of available pages: "); |
751 | for (r = 0; r < cma->nranges; r++) { |
752 | cmr = &cma->ranges[r]; |
753 | |
754 | start = 0; |
755 | nbits = cma_bitmap_maxno(cma, cmr); |
756 | |
757 | pr_info("range %d: ", r); |
758 | for (;;) { |
759 | next_zero_bit = find_next_zero_bit(addr: cmr->bitmap, |
760 | size: nbits, offset: start); |
761 | if (next_zero_bit >= nbits) |
762 | break; |
763 | next_set_bit = find_next_bit(addr: cmr->bitmap, size: nbits, |
764 | offset: next_zero_bit); |
765 | nr_zero = next_set_bit - next_zero_bit; |
766 | nr_part = nr_zero << cma->order_per_bit; |
767 | pr_cont("%s%lu@%lu", start ? "+": "", nr_part, |
768 | next_zero_bit); |
769 | start = next_zero_bit + nr_zero; |
770 | } |
771 | pr_info("\n"); |
772 | } |
773 | pr_cont("=> %lu free of %lu total pages\n", cma->available_count, |
774 | cma->count); |
775 | spin_unlock_irq(lock: &cma->lock); |
776 | } |
777 | |
778 | static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, |
779 | unsigned long count, unsigned int align, |
780 | struct page **pagep, gfp_t gfp) |
781 | { |
782 | unsigned long mask, offset; |
783 | unsigned long pfn = -1; |
784 | unsigned long start = 0; |
785 | unsigned long bitmap_maxno, bitmap_no, bitmap_count; |
786 | int ret = -EBUSY; |
787 | struct page *page = NULL; |
788 | |
789 | mask = cma_bitmap_aligned_mask(cma, align_order: align); |
790 | offset = cma_bitmap_aligned_offset(cma, cmr, align_order: align); |
791 | bitmap_maxno = cma_bitmap_maxno(cma, cmr); |
792 | bitmap_count = cma_bitmap_pages_to_bits(cma, pages: count); |
793 | |
794 | if (bitmap_count > bitmap_maxno) |
795 | goto out; |
796 | |
797 | for (;;) { |
798 | spin_lock_irq(lock: &cma->lock); |
799 | /* |
800 | * If the request is larger than the available number |
801 | * of pages, stop right away. |
802 | */ |
803 | if (count > cma->available_count) { |
804 | spin_unlock_irq(lock: &cma->lock); |
805 | break; |
806 | } |
807 | bitmap_no = bitmap_find_next_zero_area_off(map: cmr->bitmap, |
808 | size: bitmap_maxno, start, nr: bitmap_count, align_mask: mask, |
809 | align_offset: offset); |
810 | if (bitmap_no >= bitmap_maxno) { |
811 | spin_unlock_irq(lock: &cma->lock); |
812 | break; |
813 | } |
814 | bitmap_set(map: cmr->bitmap, start: bitmap_no, nbits: bitmap_count); |
815 | cma->available_count -= count; |
816 | /* |
817 | * It's safe to drop the lock here. We've marked this region for |
818 | * our exclusive use. If the migration fails we will take the |
819 | * lock again and unmark it. |
820 | */ |
821 | spin_unlock_irq(lock: &cma->lock); |
822 | |
823 | pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); |
824 | mutex_lock(&cma->alloc_mutex); |
825 | ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp); |
826 | mutex_unlock(lock: &cma->alloc_mutex); |
827 | if (ret == 0) { |
828 | page = pfn_to_page(pfn); |
829 | break; |
830 | } |
831 | |
832 | cma_clear_bitmap(cma, cmr, pfn, count); |
833 | if (ret != -EBUSY) |
834 | break; |
835 | |
836 | pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", |
837 | __func__, pfn, pfn_to_page(pfn)); |
838 | |
839 | trace_cma_alloc_busy_retry(name: cma->name, pfn, pfn_to_page(pfn), |
840 | count, align); |
841 | /* try again with a bit different memory target */ |
842 | start = bitmap_no + mask + 1; |
843 | } |
844 | out: |
845 | *pagep = page; |
846 | return ret; |
847 | } |
848 | |
849 | static struct page *__cma_alloc(struct cma *cma, unsigned long count, |
850 | unsigned int align, gfp_t gfp) |
851 | { |
852 | struct page *page = NULL; |
853 | int ret = -ENOMEM, r; |
854 | unsigned long i; |
855 | const char *name = cma ? cma->name : NULL; |
856 | |
857 | trace_cma_alloc_start(name, count, align); |
858 | |
859 | if (!cma || !cma->count) |
860 | return page; |
861 | |
862 | pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, |
863 | (void *)cma, cma->name, count, align); |
864 | |
865 | if (!count) |
866 | return page; |
867 | |
868 | for (r = 0; r < cma->nranges; r++) { |
869 | page = NULL; |
870 | |
871 | ret = cma_range_alloc(cma, cmr: &cma->ranges[r], count, align, |
872 | pagep: &page, gfp); |
873 | if (ret != -EBUSY || page) |
874 | break; |
875 | } |
876 | |
877 | /* |
878 | * CMA can allocate multiple page blocks, which results in different |
879 | * blocks being marked with different tags. Reset the tags to ignore |
880 | * those page blocks. |
881 | */ |
882 | if (page) { |
883 | for (i = 0; i < count; i++) |
884 | page_kasan_tag_reset(nth_page(page, i)); |
885 | } |
886 | |
887 | if (ret && !(gfp & __GFP_NOWARN)) { |
888 | pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n", |
889 | __func__, cma->name, count, ret); |
890 | cma_debug_show_areas(cma); |
891 | } |
892 | |
893 | pr_debug("%s(): returned %p\n", __func__, page); |
894 | trace_cma_alloc_finish(name, pfn: page ? page_to_pfn(page) : 0, |
895 | page, count, align, errorno: ret); |
896 | if (page) { |
897 | count_vm_event(item: CMA_ALLOC_SUCCESS); |
898 | cma_sysfs_account_success_pages(cma, nr_pages: count); |
899 | } else { |
900 | count_vm_event(item: CMA_ALLOC_FAIL); |
901 | cma_sysfs_account_fail_pages(cma, nr_pages: count); |
902 | } |
903 | |
904 | return page; |
905 | } |
906 | |
907 | /** |
908 | * cma_alloc() - allocate pages from contiguous area |
909 | * @cma: Contiguous memory region for which the allocation is performed. |
910 | * @count: Requested number of pages. |
911 | * @align: Requested alignment of pages (in PAGE_SIZE order). |
912 | * @no_warn: Avoid printing message about failed allocation |
913 | * |
914 | * This function allocates part of contiguous memory on specific |
915 | * contiguous memory area. |
916 | */ |
917 | struct page *cma_alloc(struct cma *cma, unsigned long count, |
918 | unsigned int align, bool no_warn) |
919 | { |
920 | return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0)); |
921 | } |
922 | |
923 | struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) |
924 | { |
925 | struct page *page; |
926 | |
927 | if (WARN_ON(!order || !(gfp & __GFP_COMP))) |
928 | return NULL; |
929 | |
930 | page = __cma_alloc(cma, count: 1 << order, align: order, gfp); |
931 | |
932 | return page ? page_folio(page) : NULL; |
933 | } |
934 | |
935 | bool cma_pages_valid(struct cma *cma, const struct page *pages, |
936 | unsigned long count) |
937 | { |
938 | unsigned long pfn, end; |
939 | int r; |
940 | struct cma_memrange *cmr; |
941 | bool ret; |
942 | |
943 | if (!cma || !pages || count > cma->count) |
944 | return false; |
945 | |
946 | pfn = page_to_pfn(pages); |
947 | ret = false; |
948 | |
949 | for (r = 0; r < cma->nranges; r++) { |
950 | cmr = &cma->ranges[r]; |
951 | end = cmr->base_pfn + cmr->count; |
952 | if (pfn >= cmr->base_pfn && pfn < end) { |
953 | ret = pfn + count <= end; |
954 | break; |
955 | } |
956 | } |
957 | |
958 | if (!ret) |
959 | pr_debug("%s(page %p, count %lu)\n", |
960 | __func__, (void *)pages, count); |
961 | |
962 | return ret; |
963 | } |
964 | |
965 | /** |
966 | * cma_release() - release allocated pages |
967 | * @cma: Contiguous memory region for which the allocation is performed. |
968 | * @pages: Allocated pages. |
969 | * @count: Number of allocated pages. |
970 | * |
971 | * This function releases memory allocated by cma_alloc(). |
972 | * It returns false when provided pages do not belong to contiguous area and |
973 | * true otherwise. |
974 | */ |
975 | bool cma_release(struct cma *cma, const struct page *pages, |
976 | unsigned long count) |
977 | { |
978 | struct cma_memrange *cmr; |
979 | unsigned long pfn, end_pfn; |
980 | int r; |
981 | |
982 | pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); |
983 | |
984 | if (!cma_pages_valid(cma, pages, count)) |
985 | return false; |
986 | |
987 | pfn = page_to_pfn(pages); |
988 | end_pfn = pfn + count; |
989 | |
990 | for (r = 0; r < cma->nranges; r++) { |
991 | cmr = &cma->ranges[r]; |
992 | if (pfn >= cmr->base_pfn && |
993 | pfn < (cmr->base_pfn + cmr->count)) { |
994 | VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count); |
995 | break; |
996 | } |
997 | } |
998 | |
999 | if (r == cma->nranges) |
1000 | return false; |
1001 | |
1002 | free_contig_range(pfn, nr_pages: count); |
1003 | cma_clear_bitmap(cma, cmr, pfn, count); |
1004 | cma_sysfs_account_release_pages(cma, nr_pages: count); |
1005 | trace_cma_release(name: cma->name, pfn, page: pages, count); |
1006 | |
1007 | return true; |
1008 | } |
1009 | |
1010 | bool cma_free_folio(struct cma *cma, const struct folio *folio) |
1011 | { |
1012 | if (WARN_ON(!folio_test_large(folio))) |
1013 | return false; |
1014 | |
1015 | return cma_release(cma, pages: &folio->page, count: folio_nr_pages(folio)); |
1016 | } |
1017 | |
1018 | int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) |
1019 | { |
1020 | int i; |
1021 | |
1022 | for (i = 0; i < cma_area_count; i++) { |
1023 | int ret = it(&cma_areas[i], data); |
1024 | |
1025 | if (ret) |
1026 | return ret; |
1027 | } |
1028 | |
1029 | return 0; |
1030 | } |
1031 | |
1032 | bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end) |
1033 | { |
1034 | int r; |
1035 | struct cma_memrange *cmr; |
1036 | unsigned long rstart, rend; |
1037 | |
1038 | for (r = 0; r < cma->nranges; r++) { |
1039 | cmr = &cma->ranges[r]; |
1040 | |
1041 | rstart = PFN_PHYS(cmr->base_pfn); |
1042 | rend = PFN_PHYS(cmr->base_pfn + cmr->count); |
1043 | if (end < rstart) |
1044 | continue; |
1045 | if (start >= rend) |
1046 | continue; |
1047 | return true; |
1048 | } |
1049 | |
1050 | return false; |
1051 | } |
1052 | |
1053 | /* |
1054 | * Very basic function to reserve memory from a CMA area that has not |
1055 | * yet been activated. This is expected to be called early, when the |
1056 | * system is single-threaded, so there is no locking. The alignment |
1057 | * checking is restrictive - only pageblock-aligned areas |
1058 | * (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function. |
1059 | * This keeps things simple, and is enough for the current use case. |
1060 | * |
1061 | * The CMA bitmaps have not yet been allocated, so just start |
1062 | * reserving from the bottom up, using a PFN to keep track |
1063 | * of what has been reserved. Unreserving is not possible. |
1064 | * |
1065 | * The caller is responsible for initializing the page structures |
1066 | * in the area properly, since this just points to memblock-allocated |
1067 | * memory. The caller should subsequently use init_cma_pageblock to |
1068 | * set the migrate type and CMA stats the pageblocks that were reserved. |
1069 | * |
1070 | * If the CMA area fails to activate later, memory obtained through |
1071 | * this interface is not handed to the page allocator, this is |
1072 | * the responsibility of the caller (e.g. like normal memblock-allocated |
1073 | * memory). |
1074 | */ |
1075 | void __init *cma_reserve_early(struct cma *cma, unsigned long size) |
1076 | { |
1077 | int r; |
1078 | struct cma_memrange *cmr; |
1079 | unsigned long available; |
1080 | void *ret = NULL; |
1081 | |
1082 | if (!cma || !cma->count) |
1083 | return NULL; |
1084 | /* |
1085 | * Can only be called early in init. |
1086 | */ |
1087 | if (test_bit(CMA_ACTIVATED, &cma->flags)) |
1088 | return NULL; |
1089 | |
1090 | if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES)) |
1091 | return NULL; |
1092 | |
1093 | if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit))) |
1094 | return NULL; |
1095 | |
1096 | size >>= PAGE_SHIFT; |
1097 | |
1098 | if (size > cma->available_count) |
1099 | return NULL; |
1100 | |
1101 | for (r = 0; r < cma->nranges; r++) { |
1102 | cmr = &cma->ranges[r]; |
1103 | available = cmr->count - (cmr->early_pfn - cmr->base_pfn); |
1104 | if (size <= available) { |
1105 | ret = phys_to_virt(PFN_PHYS(cmr->early_pfn)); |
1106 | cmr->early_pfn += size; |
1107 | cma->available_count -= size; |
1108 | return ret; |
1109 | } |
1110 | } |
1111 | |
1112 | return ret; |
1113 | } |
1114 |
Definitions
- cma_areas
- cma_area_count
- cma_get_base
- cma_get_size
- cma_get_name
- cma_bitmap_aligned_mask
- cma_bitmap_aligned_offset
- cma_bitmap_pages_to_bits
- cma_clear_bitmap
- cma_validate_zones
- cma_activate_area
- cma_init_reserved_areas
- cma_reserve_pages_on_error
- cma_new_area
- cma_drop_area
- cma_init_reserved_mem
- cma_init_memrange
- memranges
- revsizecmp
- basecmp
- list_insert_sorted
- cma_declare_contiguous_multi
- cma_declare_contiguous_nid
- __cma_declare_contiguous_nid
- cma_debug_show_areas
- cma_range_alloc
- __cma_alloc
- cma_alloc
- cma_alloc_folio
- cma_pages_valid
- cma_release
- cma_free_folio
- cma_for_each_area
- cma_intersects
Improve your Profiling and Debugging skills
Find out more