1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/memblock.h> |
3 | #include <linux/compiler.h> |
4 | #include <linux/fs.h> |
5 | #include <linux/init.h> |
6 | #include <linux/ksm.h> |
7 | #include <linux/mm.h> |
8 | #include <linux/mmzone.h> |
9 | #include <linux/huge_mm.h> |
10 | #include <linux/proc_fs.h> |
11 | #include <linux/seq_file.h> |
12 | #include <linux/hugetlb.h> |
13 | #include <linux/memremap.h> |
14 | #include <linux/memcontrol.h> |
15 | #include <linux/mmu_notifier.h> |
16 | #include <linux/page_idle.h> |
17 | #include <linux/kernel-page-flags.h> |
18 | #include <linux/uaccess.h> |
19 | #include "internal.h" |
20 | |
21 | #define KPMSIZE sizeof(u64) |
22 | #define KPMMASK (KPMSIZE - 1) |
23 | #define KPMBITS (KPMSIZE * BITS_PER_BYTE) |
24 | |
25 | static inline unsigned long get_max_dump_pfn(void) |
26 | { |
27 | #ifdef CONFIG_SPARSEMEM |
28 | /* |
29 | * The memmap of early sections is completely populated and marked |
30 | * online even if max_pfn does not fall on a section boundary - |
31 | * pfn_to_online_page() will succeed on all pages. Allow inspecting |
32 | * these memmaps. |
33 | */ |
34 | return round_up(max_pfn, PAGES_PER_SECTION); |
35 | #else |
36 | return max_pfn; |
37 | #endif |
38 | } |
39 | |
40 | /* /proc/kpagecount - an array exposing page counts |
41 | * |
42 | * Each entry is a u64 representing the corresponding |
43 | * physical page count. |
44 | */ |
45 | static ssize_t kpagecount_read(struct file *file, char __user *buf, |
46 | size_t count, loff_t *ppos) |
47 | { |
48 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
49 | u64 __user *out = (u64 __user *)buf; |
50 | struct page *ppage; |
51 | unsigned long src = *ppos; |
52 | unsigned long pfn; |
53 | ssize_t ret = 0; |
54 | u64 pcount; |
55 | |
56 | pfn = src / KPMSIZE; |
57 | if (src & KPMMASK || count & KPMMASK) |
58 | return -EINVAL; |
59 | if (src >= max_dump_pfn * KPMSIZE) |
60 | return 0; |
61 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); |
62 | |
63 | while (count > 0) { |
64 | /* |
65 | * TODO: ZONE_DEVICE support requires to identify |
66 | * memmaps that were actually initialized. |
67 | */ |
68 | ppage = pfn_to_online_page(pfn); |
69 | |
70 | if (!ppage || PageSlab(page: ppage) || page_has_type(page: ppage)) |
71 | pcount = 0; |
72 | else |
73 | pcount = page_mapcount(page: ppage); |
74 | |
75 | if (put_user(pcount, out)) { |
76 | ret = -EFAULT; |
77 | break; |
78 | } |
79 | |
80 | pfn++; |
81 | out++; |
82 | count -= KPMSIZE; |
83 | |
84 | cond_resched(); |
85 | } |
86 | |
87 | *ppos += (char __user *)out - buf; |
88 | if (!ret) |
89 | ret = (char __user *)out - buf; |
90 | return ret; |
91 | } |
92 | |
93 | static const struct proc_ops kpagecount_proc_ops = { |
94 | .proc_flags = PROC_ENTRY_PERMANENT, |
95 | .proc_lseek = mem_lseek, |
96 | .proc_read = kpagecount_read, |
97 | }; |
98 | |
99 | /* /proc/kpageflags - an array exposing page flags |
100 | * |
101 | * Each entry is a u64 representing the corresponding |
102 | * physical page flags. |
103 | */ |
104 | |
105 | static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) |
106 | { |
107 | return ((kflags >> kbit) & 1) << ubit; |
108 | } |
109 | |
110 | u64 stable_page_flags(struct page *page) |
111 | { |
112 | u64 k; |
113 | u64 u; |
114 | |
115 | /* |
116 | * pseudo flag: KPF_NOPAGE |
117 | * it differentiates a memory hole from a page with no flags |
118 | */ |
119 | if (!page) |
120 | return 1 << KPF_NOPAGE; |
121 | |
122 | k = page->flags; |
123 | u = 0; |
124 | |
125 | /* |
126 | * pseudo flags for the well known (anonymous) memory mapped pages |
127 | * |
128 | * Note that page->_mapcount is overloaded in SLAB, so the |
129 | * simple test in page_mapped() is not enough. |
130 | */ |
131 | if (!PageSlab(page) && page_mapped(page)) |
132 | u |= 1 << KPF_MMAP; |
133 | if (PageAnon(page)) |
134 | u |= 1 << KPF_ANON; |
135 | if (PageKsm(page)) |
136 | u |= 1 << KPF_KSM; |
137 | |
138 | /* |
139 | * compound pages: export both head/tail info |
140 | * they together define a compound page's start/end pos and order |
141 | */ |
142 | if (PageHead(page)) |
143 | u |= 1 << KPF_COMPOUND_HEAD; |
144 | if (PageTail(page)) |
145 | u |= 1 << KPF_COMPOUND_TAIL; |
146 | if (PageHuge(page)) |
147 | u |= 1 << KPF_HUGE; |
148 | /* |
149 | * PageTransCompound can be true for non-huge compound pages (slab |
150 | * pages or pages allocated by drivers with __GFP_COMP) because it |
151 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon |
152 | * to make sure a given page is a thp, not a non-huge compound page. |
153 | */ |
154 | else if (PageTransCompound(page)) { |
155 | struct page *head = compound_head(page); |
156 | |
157 | if (PageLRU(page: head) || PageAnon(page: head)) |
158 | u |= 1 << KPF_THP; |
159 | else if (is_huge_zero_page(page: head)) { |
160 | u |= 1 << KPF_ZERO_PAGE; |
161 | u |= 1 << KPF_THP; |
162 | } |
163 | } else if (is_zero_pfn(page_to_pfn(page))) |
164 | u |= 1 << KPF_ZERO_PAGE; |
165 | |
166 | |
167 | /* |
168 | * Caveats on high order pages: PG_buddy and PG_slab will only be set |
169 | * on the head page. |
170 | */ |
171 | if (PageBuddy(page)) |
172 | u |= 1 << KPF_BUDDY; |
173 | else if (page_count(page) == 0 && is_free_buddy_page(page)) |
174 | u |= 1 << KPF_BUDDY; |
175 | |
176 | if (PageOffline(page)) |
177 | u |= 1 << KPF_OFFLINE; |
178 | if (PageTable(page)) |
179 | u |= 1 << KPF_PGTABLE; |
180 | |
181 | if (page_is_idle(page)) |
182 | u |= 1 << KPF_IDLE; |
183 | |
184 | u |= kpf_copy_bit(kflags: k, KPF_LOCKED, kbit: PG_locked); |
185 | |
186 | u |= kpf_copy_bit(kflags: k, KPF_SLAB, kbit: PG_slab); |
187 | if (PageTail(page) && PageSlab(page)) |
188 | u |= 1 << KPF_SLAB; |
189 | |
190 | u |= kpf_copy_bit(kflags: k, KPF_ERROR, kbit: PG_error); |
191 | u |= kpf_copy_bit(kflags: k, KPF_DIRTY, kbit: PG_dirty); |
192 | u |= kpf_copy_bit(kflags: k, KPF_UPTODATE, kbit: PG_uptodate); |
193 | u |= kpf_copy_bit(kflags: k, KPF_WRITEBACK, kbit: PG_writeback); |
194 | |
195 | u |= kpf_copy_bit(kflags: k, KPF_LRU, kbit: PG_lru); |
196 | u |= kpf_copy_bit(kflags: k, KPF_REFERENCED, kbit: PG_referenced); |
197 | u |= kpf_copy_bit(kflags: k, KPF_ACTIVE, kbit: PG_active); |
198 | u |= kpf_copy_bit(kflags: k, KPF_RECLAIM, kbit: PG_reclaim); |
199 | |
200 | if (PageSwapCache(page)) |
201 | u |= 1 << KPF_SWAPCACHE; |
202 | u |= kpf_copy_bit(kflags: k, KPF_SWAPBACKED, kbit: PG_swapbacked); |
203 | |
204 | u |= kpf_copy_bit(kflags: k, KPF_UNEVICTABLE, kbit: PG_unevictable); |
205 | u |= kpf_copy_bit(kflags: k, KPF_MLOCKED, kbit: PG_mlocked); |
206 | |
207 | #ifdef CONFIG_MEMORY_FAILURE |
208 | u |= kpf_copy_bit(kflags: k, KPF_HWPOISON, kbit: PG_hwpoison); |
209 | #endif |
210 | |
211 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
212 | u |= kpf_copy_bit(kflags: k, KPF_UNCACHED, kbit: PG_uncached); |
213 | #endif |
214 | |
215 | u |= kpf_copy_bit(kflags: k, KPF_RESERVED, kbit: PG_reserved); |
216 | u |= kpf_copy_bit(kflags: k, KPF_MAPPEDTODISK, kbit: PG_mappedtodisk); |
217 | u |= kpf_copy_bit(kflags: k, KPF_PRIVATE, kbit: PG_private); |
218 | u |= kpf_copy_bit(kflags: k, KPF_PRIVATE_2, kbit: PG_private_2); |
219 | u |= kpf_copy_bit(kflags: k, KPF_OWNER_PRIVATE, kbit: PG_owner_priv_1); |
220 | u |= kpf_copy_bit(kflags: k, KPF_ARCH, kbit: PG_arch_1); |
221 | #ifdef CONFIG_ARCH_USES_PG_ARCH_X |
222 | u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); |
223 | u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); |
224 | #endif |
225 | |
226 | return u; |
227 | }; |
228 | |
229 | static ssize_t kpageflags_read(struct file *file, char __user *buf, |
230 | size_t count, loff_t *ppos) |
231 | { |
232 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
233 | u64 __user *out = (u64 __user *)buf; |
234 | struct page *ppage; |
235 | unsigned long src = *ppos; |
236 | unsigned long pfn; |
237 | ssize_t ret = 0; |
238 | |
239 | pfn = src / KPMSIZE; |
240 | if (src & KPMMASK || count & KPMMASK) |
241 | return -EINVAL; |
242 | if (src >= max_dump_pfn * KPMSIZE) |
243 | return 0; |
244 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); |
245 | |
246 | while (count > 0) { |
247 | /* |
248 | * TODO: ZONE_DEVICE support requires to identify |
249 | * memmaps that were actually initialized. |
250 | */ |
251 | ppage = pfn_to_online_page(pfn); |
252 | |
253 | if (put_user(stable_page_flags(ppage), out)) { |
254 | ret = -EFAULT; |
255 | break; |
256 | } |
257 | |
258 | pfn++; |
259 | out++; |
260 | count -= KPMSIZE; |
261 | |
262 | cond_resched(); |
263 | } |
264 | |
265 | *ppos += (char __user *)out - buf; |
266 | if (!ret) |
267 | ret = (char __user *)out - buf; |
268 | return ret; |
269 | } |
270 | |
271 | static const struct proc_ops kpageflags_proc_ops = { |
272 | .proc_flags = PROC_ENTRY_PERMANENT, |
273 | .proc_lseek = mem_lseek, |
274 | .proc_read = kpageflags_read, |
275 | }; |
276 | |
277 | #ifdef CONFIG_MEMCG |
278 | static ssize_t kpagecgroup_read(struct file *file, char __user *buf, |
279 | size_t count, loff_t *ppos) |
280 | { |
281 | const unsigned long max_dump_pfn = get_max_dump_pfn(); |
282 | u64 __user *out = (u64 __user *)buf; |
283 | struct page *ppage; |
284 | unsigned long src = *ppos; |
285 | unsigned long pfn; |
286 | ssize_t ret = 0; |
287 | u64 ino; |
288 | |
289 | pfn = src / KPMSIZE; |
290 | if (src & KPMMASK || count & KPMMASK) |
291 | return -EINVAL; |
292 | if (src >= max_dump_pfn * KPMSIZE) |
293 | return 0; |
294 | count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); |
295 | |
296 | while (count > 0) { |
297 | /* |
298 | * TODO: ZONE_DEVICE support requires to identify |
299 | * memmaps that were actually initialized. |
300 | */ |
301 | ppage = pfn_to_online_page(pfn); |
302 | |
303 | if (ppage) |
304 | ino = page_cgroup_ino(page: ppage); |
305 | else |
306 | ino = 0; |
307 | |
308 | if (put_user(ino, out)) { |
309 | ret = -EFAULT; |
310 | break; |
311 | } |
312 | |
313 | pfn++; |
314 | out++; |
315 | count -= KPMSIZE; |
316 | |
317 | cond_resched(); |
318 | } |
319 | |
320 | *ppos += (char __user *)out - buf; |
321 | if (!ret) |
322 | ret = (char __user *)out - buf; |
323 | return ret; |
324 | } |
325 | |
326 | static const struct proc_ops kpagecgroup_proc_ops = { |
327 | .proc_flags = PROC_ENTRY_PERMANENT, |
328 | .proc_lseek = mem_lseek, |
329 | .proc_read = kpagecgroup_read, |
330 | }; |
331 | #endif /* CONFIG_MEMCG */ |
332 | |
333 | static int __init proc_page_init(void) |
334 | { |
335 | proc_create(name: "kpagecount" , S_IRUSR, NULL, proc_ops: &kpagecount_proc_ops); |
336 | proc_create(name: "kpageflags" , S_IRUSR, NULL, proc_ops: &kpageflags_proc_ops); |
337 | #ifdef CONFIG_MEMCG |
338 | proc_create(name: "kpagecgroup" , S_IRUSR, NULL, proc_ops: &kpagecgroup_proc_ops); |
339 | #endif |
340 | return 0; |
341 | } |
342 | fs_initcall(proc_page_init); |
343 | |