1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/mm/page_isolation.c |
4 | */ |
5 | |
6 | #include <linux/mm.h> |
7 | #include <linux/page-isolation.h> |
8 | #include <linux/pageblock-flags.h> |
9 | #include <linux/memory.h> |
10 | #include <linux/hugetlb.h> |
11 | #include <linux/page_owner.h> |
12 | #include <linux/migrate.h> |
13 | #include "internal.h" |
14 | |
15 | #define CREATE_TRACE_POINTS |
16 | #include <trace/events/page_isolation.h> |
17 | |
18 | /* |
19 | * This function checks whether the range [start_pfn, end_pfn) includes |
20 | * unmovable pages or not. The range must fall into a single pageblock and |
21 | * consequently belong to a single zone. |
22 | * |
23 | * PageLRU check without isolation or lru_lock could race so that |
24 | * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable |
25 | * check without lock_page also may miss some movable non-lru pages at |
26 | * race condition. So you can't expect this function should be exact. |
27 | * |
28 | * Returns a page without holding a reference. If the caller wants to |
29 | * dereference that page (e.g., dumping), it has to make sure that it |
30 | * cannot get removed (e.g., via memory unplug) concurrently. |
31 | * |
32 | */ |
33 | static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long end_pfn, |
34 | int migratetype, int flags) |
35 | { |
36 | struct page *page = pfn_to_page(start_pfn); |
37 | struct zone *zone = page_zone(page); |
38 | unsigned long pfn; |
39 | |
40 | VM_BUG_ON(pageblock_start_pfn(start_pfn) != |
41 | pageblock_start_pfn(end_pfn - 1)); |
42 | |
43 | if (is_migrate_cma_page(page)) { |
44 | /* |
45 | * CMA allocations (alloc_contig_range) really need to mark |
46 | * isolate CMA pageblocks even when they are not movable in fact |
47 | * so consider them movable here. |
48 | */ |
49 | if (is_migrate_cma(migratetype)) |
50 | return NULL; |
51 | |
52 | return page; |
53 | } |
54 | |
55 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
56 | page = pfn_to_page(pfn); |
57 | |
58 | /* |
59 | * Both, bootmem allocations and memory holes are marked |
60 | * PG_reserved and are unmovable. We can even have unmovable |
61 | * allocations inside ZONE_MOVABLE, for example when |
62 | * specifying "movablecore". |
63 | */ |
64 | if (PageReserved(page)) |
65 | return page; |
66 | |
67 | /* |
68 | * If the zone is movable and we have ruled out all reserved |
69 | * pages then it should be reasonably safe to assume the rest |
70 | * is movable. |
71 | */ |
72 | if (zone_idx(zone) == ZONE_MOVABLE) |
73 | continue; |
74 | |
75 | /* |
76 | * Hugepages are not in LRU lists, but they're movable. |
77 | * THPs are on the LRU, but need to be counted as #small pages. |
78 | * We need not scan over tail pages because we don't |
79 | * handle each tail page individually in migration. |
80 | */ |
81 | if (PageHuge(page) || PageTransCompound(page)) { |
82 | struct folio *folio = page_folio(page); |
83 | unsigned int skip_pages; |
84 | |
85 | if (PageHuge(page)) { |
86 | if (!hugepage_migration_supported(h: folio_hstate(folio))) |
87 | return page; |
88 | } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) { |
89 | return page; |
90 | } |
91 | |
92 | skip_pages = folio_nr_pages(folio) - folio_page_idx(folio, page); |
93 | pfn += skip_pages - 1; |
94 | continue; |
95 | } |
96 | |
97 | /* |
98 | * We can't use page_count without pin a page |
99 | * because another CPU can free compound page. |
100 | * This check already skips compound tails of THP |
101 | * because their page->_refcount is zero at all time. |
102 | */ |
103 | if (!page_ref_count(page)) { |
104 | if (PageBuddy(page)) |
105 | pfn += (1 << buddy_order(page)) - 1; |
106 | continue; |
107 | } |
108 | |
109 | /* |
110 | * The HWPoisoned page may be not in buddy system, and |
111 | * page_count() is not 0. |
112 | */ |
113 | if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) |
114 | continue; |
115 | |
116 | /* |
117 | * We treat all PageOffline() pages as movable when offlining |
118 | * to give drivers a chance to decrement their reference count |
119 | * in MEM_GOING_OFFLINE in order to indicate that these pages |
120 | * can be offlined as there are no direct references anymore. |
121 | * For actually unmovable PageOffline() where the driver does |
122 | * not support this, we will fail later when trying to actually |
123 | * move these pages that still have a reference count > 0. |
124 | * (false negatives in this function only) |
125 | */ |
126 | if ((flags & MEMORY_OFFLINE) && PageOffline(page)) |
127 | continue; |
128 | |
129 | if (__PageMovable(page) || PageLRU(page)) |
130 | continue; |
131 | |
132 | /* |
133 | * If there are RECLAIMABLE pages, we need to check |
134 | * it. But now, memory offline itself doesn't call |
135 | * shrink_node_slabs() and it still to be fixed. |
136 | */ |
137 | return page; |
138 | } |
139 | return NULL; |
140 | } |
141 | |
142 | /* |
143 | * This function set pageblock migratetype to isolate if no unmovable page is |
144 | * present in [start_pfn, end_pfn). The pageblock must intersect with |
145 | * [start_pfn, end_pfn). |
146 | */ |
147 | static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags, |
148 | unsigned long start_pfn, unsigned long end_pfn) |
149 | { |
150 | struct zone *zone = page_zone(page); |
151 | struct page *unmovable; |
152 | unsigned long flags; |
153 | unsigned long check_unmovable_start, check_unmovable_end; |
154 | |
155 | spin_lock_irqsave(&zone->lock, flags); |
156 | |
157 | /* |
158 | * We assume the caller intended to SET migrate type to isolate. |
159 | * If it is already set, then someone else must have raced and |
160 | * set it before us. |
161 | */ |
162 | if (is_migrate_isolate_page(page)) { |
163 | spin_unlock_irqrestore(lock: &zone->lock, flags); |
164 | return -EBUSY; |
165 | } |
166 | |
167 | /* |
168 | * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. |
169 | * We just check MOVABLE pages. |
170 | * |
171 | * Pass the intersection of [start_pfn, end_pfn) and the page's pageblock |
172 | * to avoid redundant checks. |
173 | */ |
174 | check_unmovable_start = max(page_to_pfn(page), start_pfn); |
175 | check_unmovable_end = min(pageblock_end_pfn(page_to_pfn(page)), |
176 | end_pfn); |
177 | |
178 | unmovable = has_unmovable_pages(start_pfn: check_unmovable_start, end_pfn: check_unmovable_end, |
179 | migratetype, flags: isol_flags); |
180 | if (!unmovable) { |
181 | unsigned long nr_pages; |
182 | int mt = get_pageblock_migratetype(page); |
183 | |
184 | set_pageblock_migratetype(page, migratetype: MIGRATE_ISOLATE); |
185 | zone->nr_isolate_pageblock++; |
186 | nr_pages = move_freepages_block(zone, page, migratetype: MIGRATE_ISOLATE, |
187 | NULL); |
188 | |
189 | __mod_zone_freepage_state(zone, nr_pages: -nr_pages, migratetype: mt); |
190 | spin_unlock_irqrestore(lock: &zone->lock, flags); |
191 | return 0; |
192 | } |
193 | |
194 | spin_unlock_irqrestore(lock: &zone->lock, flags); |
195 | if (isol_flags & REPORT_FAILURE) { |
196 | /* |
197 | * printk() with zone->lock held will likely trigger a |
198 | * lockdep splat, so defer it here. |
199 | */ |
200 | dump_page(page: unmovable, reason: "unmovable page" ); |
201 | } |
202 | |
203 | return -EBUSY; |
204 | } |
205 | |
206 | static void unset_migratetype_isolate(struct page *page, int migratetype) |
207 | { |
208 | struct zone *zone; |
209 | unsigned long flags, nr_pages; |
210 | bool isolated_page = false; |
211 | unsigned int order; |
212 | struct page *buddy; |
213 | |
214 | zone = page_zone(page); |
215 | spin_lock_irqsave(&zone->lock, flags); |
216 | if (!is_migrate_isolate_page(page)) |
217 | goto out; |
218 | |
219 | /* |
220 | * Because freepage with more than pageblock_order on isolated |
221 | * pageblock is restricted to merge due to freepage counting problem, |
222 | * it is possible that there is free buddy page. |
223 | * move_freepages_block() doesn't care of merge so we need other |
224 | * approach in order to merge them. Isolation and free will make |
225 | * these pages to be merged. |
226 | */ |
227 | if (PageBuddy(page)) { |
228 | order = buddy_order(page); |
229 | if (order >= pageblock_order && order < MAX_ORDER) { |
230 | buddy = find_buddy_page_pfn(page, page_to_pfn(page), |
231 | order, NULL); |
232 | if (buddy && !is_migrate_isolate_page(page: buddy)) { |
233 | isolated_page = !!__isolate_free_page(page, order); |
234 | /* |
235 | * Isolating a free page in an isolated pageblock |
236 | * is expected to always work as watermarks don't |
237 | * apply here. |
238 | */ |
239 | VM_WARN_ON(!isolated_page); |
240 | } |
241 | } |
242 | } |
243 | |
244 | /* |
245 | * If we isolate freepage with more than pageblock_order, there |
246 | * should be no freepage in the range, so we could avoid costly |
247 | * pageblock scanning for freepage moving. |
248 | * |
249 | * We didn't actually touch any of the isolated pages, so place them |
250 | * to the tail of the freelist. This is an optimization for memory |
251 | * onlining - just onlined memory won't immediately be considered for |
252 | * allocation. |
253 | */ |
254 | if (!isolated_page) { |
255 | nr_pages = move_freepages_block(zone, page, migratetype, NULL); |
256 | __mod_zone_freepage_state(zone, nr_pages, migratetype); |
257 | } |
258 | set_pageblock_migratetype(page, migratetype); |
259 | if (isolated_page) |
260 | __putback_isolated_page(page, order, mt: migratetype); |
261 | zone->nr_isolate_pageblock--; |
262 | out: |
263 | spin_unlock_irqrestore(lock: &zone->lock, flags); |
264 | } |
265 | |
266 | static inline struct page * |
267 | __first_valid_page(unsigned long pfn, unsigned long nr_pages) |
268 | { |
269 | int i; |
270 | |
271 | for (i = 0; i < nr_pages; i++) { |
272 | struct page *page; |
273 | |
274 | page = pfn_to_online_page(pfn: pfn + i); |
275 | if (!page) |
276 | continue; |
277 | return page; |
278 | } |
279 | return NULL; |
280 | } |
281 | |
282 | /** |
283 | * isolate_single_pageblock() -- tries to isolate a pageblock that might be |
284 | * within a free or in-use page. |
285 | * @boundary_pfn: pageblock-aligned pfn that a page might cross |
286 | * @flags: isolation flags |
287 | * @gfp_flags: GFP flags used for migrating pages |
288 | * @isolate_before: isolate the pageblock before the boundary_pfn |
289 | * @skip_isolation: the flag to skip the pageblock isolation in second |
290 | * isolate_single_pageblock() |
291 | * @migratetype: migrate type to set in error recovery. |
292 | * |
293 | * Free and in-use pages can be as big as MAX_ORDER and contain more than one |
294 | * pageblock. When not all pageblocks within a page are isolated at the same |
295 | * time, free page accounting can go wrong. For example, in the case of |
296 | * MAX_ORDER = pageblock_order + 1, a MAX_ORDER page has two pagelbocks. |
297 | * [ MAX_ORDER ] |
298 | * [ pageblock0 | pageblock1 ] |
299 | * When either pageblock is isolated, if it is a free page, the page is not |
300 | * split into separate migratetype lists, which is supposed to; if it is an |
301 | * in-use page and freed later, __free_one_page() does not split the free page |
302 | * either. The function handles this by splitting the free page or migrating |
303 | * the in-use page then splitting the free page. |
304 | */ |
305 | static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, |
306 | gfp_t gfp_flags, bool isolate_before, bool skip_isolation, |
307 | int migratetype) |
308 | { |
309 | unsigned long start_pfn; |
310 | unsigned long isolate_pageblock; |
311 | unsigned long pfn; |
312 | struct zone *zone; |
313 | int ret; |
314 | |
315 | VM_BUG_ON(!pageblock_aligned(boundary_pfn)); |
316 | |
317 | if (isolate_before) |
318 | isolate_pageblock = boundary_pfn - pageblock_nr_pages; |
319 | else |
320 | isolate_pageblock = boundary_pfn; |
321 | |
322 | /* |
323 | * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid |
324 | * only isolating a subset of pageblocks from a bigger than pageblock |
325 | * free or in-use page. Also make sure all to-be-isolated pageblocks |
326 | * are within the same zone. |
327 | */ |
328 | zone = page_zone(pfn_to_page(isolate_pageblock)); |
329 | start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), |
330 | zone->zone_start_pfn); |
331 | |
332 | if (skip_isolation) { |
333 | int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); |
334 | |
335 | VM_BUG_ON(!is_migrate_isolate(mt)); |
336 | } else { |
337 | ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype, |
338 | isol_flags: flags, start_pfn: isolate_pageblock, end_pfn: isolate_pageblock + pageblock_nr_pages); |
339 | |
340 | if (ret) |
341 | return ret; |
342 | } |
343 | |
344 | /* |
345 | * Bail out early when the to-be-isolated pageblock does not form |
346 | * a free or in-use page across boundary_pfn: |
347 | * |
348 | * 1. isolate before boundary_pfn: the page after is not online |
349 | * 2. isolate after boundary_pfn: the page before is not online |
350 | * |
351 | * This also ensures correctness. Without it, when isolate after |
352 | * boundary_pfn and [start_pfn, boundary_pfn) are not online, |
353 | * __first_valid_page() will return unexpected NULL in the for loop |
354 | * below. |
355 | */ |
356 | if (isolate_before) { |
357 | if (!pfn_to_online_page(pfn: boundary_pfn)) |
358 | return 0; |
359 | } else { |
360 | if (!pfn_to_online_page(pfn: boundary_pfn - 1)) |
361 | return 0; |
362 | } |
363 | |
364 | for (pfn = start_pfn; pfn < boundary_pfn;) { |
365 | struct page *page = __first_valid_page(pfn, nr_pages: boundary_pfn - pfn); |
366 | |
367 | VM_BUG_ON(!page); |
368 | pfn = page_to_pfn(page); |
369 | /* |
370 | * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any |
371 | * free pages in [start_pfn, boundary_pfn), its head page will |
372 | * always be in the range. |
373 | */ |
374 | if (PageBuddy(page)) { |
375 | int order = buddy_order(page); |
376 | |
377 | if (pfn + (1UL << order) > boundary_pfn) { |
378 | /* free page changed before split, check it again */ |
379 | if (split_free_page(free_page: page, order, split_pfn_offset: boundary_pfn - pfn)) |
380 | continue; |
381 | } |
382 | |
383 | pfn += 1UL << order; |
384 | continue; |
385 | } |
386 | /* |
387 | * migrate compound pages then let the free page handling code |
388 | * above do the rest. If migration is not possible, just fail. |
389 | */ |
390 | if (PageCompound(page)) { |
391 | struct page *head = compound_head(page); |
392 | unsigned long head_pfn = page_to_pfn(head); |
393 | unsigned long nr_pages = compound_nr(page: head); |
394 | |
395 | if (head_pfn + nr_pages <= boundary_pfn) { |
396 | pfn = head_pfn + nr_pages; |
397 | continue; |
398 | } |
399 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA |
400 | /* |
401 | * hugetlb, lru compound (THP), and movable compound pages |
402 | * can be migrated. Otherwise, fail the isolation. |
403 | */ |
404 | if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) { |
405 | int order; |
406 | unsigned long outer_pfn; |
407 | int page_mt = get_pageblock_migratetype(page); |
408 | bool isolate_page = !is_migrate_isolate_page(page); |
409 | struct compact_control cc = { |
410 | .nr_migratepages = 0, |
411 | .order = -1, |
412 | .zone = page_zone(pfn_to_page(head_pfn)), |
413 | .mode = MIGRATE_SYNC, |
414 | .ignore_skip_hint = true, |
415 | .no_set_skip_hint = true, |
416 | .gfp_mask = gfp_flags, |
417 | .alloc_contig = true, |
418 | }; |
419 | INIT_LIST_HEAD(list: &cc.migratepages); |
420 | |
421 | /* |
422 | * XXX: mark the page as MIGRATE_ISOLATE so that |
423 | * no one else can grab the freed page after migration. |
424 | * Ideally, the page should be freed as two separate |
425 | * pages to be added into separate migratetype free |
426 | * lists. |
427 | */ |
428 | if (isolate_page) { |
429 | ret = set_migratetype_isolate(page, migratetype: page_mt, |
430 | isol_flags: flags, start_pfn: head_pfn, end_pfn: head_pfn + nr_pages); |
431 | if (ret) |
432 | goto failed; |
433 | } |
434 | |
435 | ret = __alloc_contig_migrate_range(cc: &cc, start: head_pfn, |
436 | end: head_pfn + nr_pages); |
437 | |
438 | /* |
439 | * restore the page's migratetype so that it can |
440 | * be split into separate migratetype free lists |
441 | * later. |
442 | */ |
443 | if (isolate_page) |
444 | unset_migratetype_isolate(page, migratetype: page_mt); |
445 | |
446 | if (ret) |
447 | goto failed; |
448 | /* |
449 | * reset pfn to the head of the free page, so |
450 | * that the free page handling code above can split |
451 | * the free page to the right migratetype list. |
452 | * |
453 | * head_pfn is not used here as a hugetlb page order |
454 | * can be bigger than MAX_ORDER, but after it is |
455 | * freed, the free page order is not. Use pfn within |
456 | * the range to find the head of the free page. |
457 | */ |
458 | order = 0; |
459 | outer_pfn = pfn; |
460 | while (!PageBuddy(pfn_to_page(outer_pfn))) { |
461 | /* stop if we cannot find the free page */ |
462 | if (++order > MAX_ORDER) |
463 | goto failed; |
464 | outer_pfn &= ~0UL << order; |
465 | } |
466 | pfn = outer_pfn; |
467 | continue; |
468 | } else |
469 | #endif |
470 | goto failed; |
471 | } |
472 | |
473 | pfn++; |
474 | } |
475 | return 0; |
476 | failed: |
477 | /* restore the original migratetype */ |
478 | if (!skip_isolation) |
479 | unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype); |
480 | return -EBUSY; |
481 | } |
482 | |
483 | /** |
484 | * start_isolate_page_range() - mark page range MIGRATE_ISOLATE |
485 | * @start_pfn: The first PFN of the range to be isolated. |
486 | * @end_pfn: The last PFN of the range to be isolated. |
487 | * @migratetype: Migrate type to set in error recovery. |
488 | * @flags: The following flags are allowed (they can be combined in |
489 | * a bit mask) |
490 | * MEMORY_OFFLINE - isolate to offline (!allocate) memory |
491 | * e.g., skip over PageHWPoison() pages |
492 | * and PageOffline() pages. |
493 | * REPORT_FAILURE - report details about the failure to |
494 | * isolate the range |
495 | * @gfp_flags: GFP flags used for migrating pages that sit across the |
496 | * range boundaries. |
497 | * |
498 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in |
499 | * the range will never be allocated. Any free pages and pages freed in the |
500 | * future will not be allocated again. If specified range includes migrate types |
501 | * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all |
502 | * pages in the range finally, the caller have to free all pages in the range. |
503 | * test_page_isolated() can be used for test it. |
504 | * |
505 | * The function first tries to isolate the pageblocks at the beginning and end |
506 | * of the range, since there might be pages across the range boundaries. |
507 | * Afterwards, it isolates the rest of the range. |
508 | * |
509 | * There is no high level synchronization mechanism that prevents two threads |
510 | * from trying to isolate overlapping ranges. If this happens, one thread |
511 | * will notice pageblocks in the overlapping range already set to isolate. |
512 | * This happens in set_migratetype_isolate, and set_migratetype_isolate |
513 | * returns an error. We then clean up by restoring the migration type on |
514 | * pageblocks we may have modified and return -EBUSY to caller. This |
515 | * prevents two threads from simultaneously working on overlapping ranges. |
516 | * |
517 | * Please note that there is no strong synchronization with the page allocator |
518 | * either. Pages might be freed while their page blocks are marked ISOLATED. |
519 | * A call to drain_all_pages() after isolation can flush most of them. However |
520 | * in some cases pages might still end up on pcp lists and that would allow |
521 | * for their allocation even when they are in fact isolated already. Depending |
522 | * on how strong of a guarantee the caller needs, zone_pcp_disable/enable() |
523 | * might be used to flush and disable pcplist before isolation and enable after |
524 | * unisolation. |
525 | * |
526 | * Return: 0 on success and -EBUSY if any part of range cannot be isolated. |
527 | */ |
528 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
529 | int migratetype, int flags, gfp_t gfp_flags) |
530 | { |
531 | unsigned long pfn; |
532 | struct page *page; |
533 | /* isolation is done at page block granularity */ |
534 | unsigned long isolate_start = pageblock_start_pfn(start_pfn); |
535 | unsigned long isolate_end = pageblock_align(end_pfn); |
536 | int ret; |
537 | bool skip_isolation = false; |
538 | |
539 | /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ |
540 | ret = isolate_single_pageblock(boundary_pfn: isolate_start, flags, gfp_flags, isolate_before: false, |
541 | skip_isolation, migratetype); |
542 | if (ret) |
543 | return ret; |
544 | |
545 | if (isolate_start == isolate_end - pageblock_nr_pages) |
546 | skip_isolation = true; |
547 | |
548 | /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ |
549 | ret = isolate_single_pageblock(boundary_pfn: isolate_end, flags, gfp_flags, isolate_before: true, |
550 | skip_isolation, migratetype); |
551 | if (ret) { |
552 | unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); |
553 | return ret; |
554 | } |
555 | |
556 | /* skip isolated pageblocks at the beginning and end */ |
557 | for (pfn = isolate_start + pageblock_nr_pages; |
558 | pfn < isolate_end - pageblock_nr_pages; |
559 | pfn += pageblock_nr_pages) { |
560 | page = __first_valid_page(pfn, pageblock_nr_pages); |
561 | if (page && set_migratetype_isolate(page, migratetype, isol_flags: flags, |
562 | start_pfn, end_pfn)) { |
563 | undo_isolate_page_range(start_pfn: isolate_start, end_pfn: pfn, migratetype); |
564 | unset_migratetype_isolate( |
565 | pfn_to_page(isolate_end - pageblock_nr_pages), |
566 | migratetype); |
567 | return -EBUSY; |
568 | } |
569 | } |
570 | return 0; |
571 | } |
572 | |
573 | /** |
574 | * undo_isolate_page_range - undo effects of start_isolate_page_range() |
575 | * @start_pfn: The first PFN of the isolated range |
576 | * @end_pfn: The last PFN of the isolated range |
577 | * @migratetype: New migrate type to set on the range |
578 | * |
579 | * This finds every MIGRATE_ISOLATE page block in the given range |
580 | * and switches it to @migratetype. |
581 | */ |
582 | void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
583 | int migratetype) |
584 | { |
585 | unsigned long pfn; |
586 | struct page *page; |
587 | unsigned long isolate_start = pageblock_start_pfn(start_pfn); |
588 | unsigned long isolate_end = pageblock_align(end_pfn); |
589 | |
590 | for (pfn = isolate_start; |
591 | pfn < isolate_end; |
592 | pfn += pageblock_nr_pages) { |
593 | page = __first_valid_page(pfn, pageblock_nr_pages); |
594 | if (!page || !is_migrate_isolate_page(page)) |
595 | continue; |
596 | unset_migratetype_isolate(page, migratetype); |
597 | } |
598 | } |
599 | /* |
600 | * Test all pages in the range is free(means isolated) or not. |
601 | * all pages in [start_pfn...end_pfn) must be in the same zone. |
602 | * zone->lock must be held before call this. |
603 | * |
604 | * Returns the last tested pfn. |
605 | */ |
606 | static unsigned long |
607 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, |
608 | int flags) |
609 | { |
610 | struct page *page; |
611 | |
612 | while (pfn < end_pfn) { |
613 | page = pfn_to_page(pfn); |
614 | if (PageBuddy(page)) |
615 | /* |
616 | * If the page is on a free list, it has to be on |
617 | * the correct MIGRATE_ISOLATE freelist. There is no |
618 | * simple way to verify that as VM_BUG_ON(), though. |
619 | */ |
620 | pfn += 1 << buddy_order(page); |
621 | else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) |
622 | /* A HWPoisoned page cannot be also PageBuddy */ |
623 | pfn++; |
624 | else if ((flags & MEMORY_OFFLINE) && PageOffline(page) && |
625 | !page_count(page)) |
626 | /* |
627 | * The responsible driver agreed to skip PageOffline() |
628 | * pages when offlining memory by dropping its |
629 | * reference in MEM_GOING_OFFLINE. |
630 | */ |
631 | pfn++; |
632 | else |
633 | break; |
634 | } |
635 | |
636 | return pfn; |
637 | } |
638 | |
639 | /** |
640 | * test_pages_isolated - check if pageblocks in range are isolated |
641 | * @start_pfn: The first PFN of the isolated range |
642 | * @end_pfn: The first PFN *after* the isolated range |
643 | * @isol_flags: Testing mode flags |
644 | * |
645 | * This tests if all in the specified range are free. |
646 | * |
647 | * If %MEMORY_OFFLINE is specified in @flags, it will consider |
648 | * poisoned and offlined pages free as well. |
649 | * |
650 | * Caller must ensure the requested range doesn't span zones. |
651 | * |
652 | * Returns 0 if true, -EBUSY if one or more pages are in use. |
653 | */ |
654 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, |
655 | int isol_flags) |
656 | { |
657 | unsigned long pfn, flags; |
658 | struct page *page; |
659 | struct zone *zone; |
660 | int ret; |
661 | |
662 | /* |
663 | * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages |
664 | * are not aligned to pageblock_nr_pages. |
665 | * Then we just check migratetype first. |
666 | */ |
667 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
668 | page = __first_valid_page(pfn, pageblock_nr_pages); |
669 | if (page && !is_migrate_isolate_page(page)) |
670 | break; |
671 | } |
672 | page = __first_valid_page(pfn: start_pfn, nr_pages: end_pfn - start_pfn); |
673 | if ((pfn < end_pfn) || !page) { |
674 | ret = -EBUSY; |
675 | goto out; |
676 | } |
677 | |
678 | /* Check all pages are free or marked as ISOLATED */ |
679 | zone = page_zone(page); |
680 | spin_lock_irqsave(&zone->lock, flags); |
681 | pfn = __test_page_isolated_in_pageblock(pfn: start_pfn, end_pfn, flags: isol_flags); |
682 | spin_unlock_irqrestore(lock: &zone->lock, flags); |
683 | |
684 | ret = pfn < end_pfn ? -EBUSY : 0; |
685 | |
686 | out: |
687 | trace_test_pages_isolated(start_pfn, end_pfn, fin_pfn: pfn); |
688 | |
689 | return ret; |
690 | } |
691 | |