1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * |
3 | * page_pool/helpers.h |
4 | * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> |
5 | * Copyright (C) 2016 Red Hat, Inc. |
6 | */ |
7 | |
8 | /** |
9 | * DOC: page_pool allocator |
10 | * |
11 | * The page_pool allocator is optimized for recycling page or page fragment used |
12 | * by skb packet and xdp frame. |
13 | * |
14 | * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(), |
15 | * which allocate memory with or without page splitting depending on the |
16 | * requested memory size. |
17 | * |
18 | * If the driver knows that it always requires full pages or its allocations are |
19 | * always smaller than half a page, it can use one of the more specific API |
20 | * calls: |
21 | * |
22 | * 1. page_pool_alloc_pages(): allocate memory without page splitting when |
23 | * driver knows that the memory it need is always bigger than half of the page |
24 | * allocated from page pool. There is no cache line dirtying for 'struct page' |
25 | * when a page is recycled back to the page pool. |
26 | * |
27 | * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver |
28 | * knows that the memory it need is always smaller than or equal to half of the |
29 | * page allocated from page pool. Page splitting enables memory saving and thus |
30 | * avoids TLB/cache miss for data access, but there also is some cost to |
31 | * implement page splitting, mainly some cache line dirtying/bouncing for |
32 | * 'struct page' and atomic operation for page->pp_ref_count. |
33 | * |
34 | * The API keeps track of in-flight pages, in order to let API users know when |
35 | * it is safe to free a page_pool object, the API users must call |
36 | * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or |
37 | * attach the page_pool object to a page_pool-aware object like skbs marked with |
38 | * skb_mark_for_recycle(). |
39 | * |
40 | * page_pool_put_page() may be called multiple times on the same page if a page |
41 | * is split into multiple fragments. For the last fragment, it will either |
42 | * recycle the page, or in case of page->_refcount > 1, it will release the DMA |
43 | * mapping and in-flight state accounting. |
44 | * |
45 | * dma_sync_single_range_for_device() is only called for the last fragment when |
46 | * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the |
47 | * last freed fragment to do the sync_for_device operation for all fragments in |
48 | * the same page when a page is split. The API user must setup pool->p.max_len |
49 | * and pool->p.offset correctly and ensure that page_pool_put_page() is called |
50 | * with dma_sync_size being -1 for fragment API. |
51 | */ |
52 | #ifndef _NET_PAGE_POOL_HELPERS_H |
53 | #define _NET_PAGE_POOL_HELPERS_H |
54 | |
55 | #include <net/page_pool/types.h> |
56 | |
57 | #ifdef CONFIG_PAGE_POOL_STATS |
58 | /* Deprecated driver-facing API, use netlink instead */ |
59 | int page_pool_ethtool_stats_get_count(void); |
60 | u8 *page_pool_ethtool_stats_get_strings(u8 *data); |
61 | u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); |
62 | |
63 | bool page_pool_get_stats(const struct page_pool *pool, |
64 | struct page_pool_stats *stats); |
65 | #else |
66 | static inline int page_pool_ethtool_stats_get_count(void) |
67 | { |
68 | return 0; |
69 | } |
70 | |
71 | static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) |
72 | { |
73 | return data; |
74 | } |
75 | |
76 | static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) |
77 | { |
78 | return data; |
79 | } |
80 | #endif |
81 | |
82 | /** |
83 | * page_pool_dev_alloc_pages() - allocate a page. |
84 | * @pool: pool from which to allocate |
85 | * |
86 | * Get a page from the page allocator or page_pool caches. |
87 | */ |
88 | static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) |
89 | { |
90 | gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); |
91 | |
92 | return page_pool_alloc_pages(pool, gfp); |
93 | } |
94 | |
95 | /** |
96 | * page_pool_dev_alloc_frag() - allocate a page fragment. |
97 | * @pool: pool from which to allocate |
98 | * @offset: offset to the allocated page |
99 | * @size: requested size |
100 | * |
101 | * Get a page fragment from the page allocator or page_pool caches. |
102 | * |
103 | * Return: |
104 | * Return allocated page fragment, otherwise return NULL. |
105 | */ |
106 | static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, |
107 | unsigned int *offset, |
108 | unsigned int size) |
109 | { |
110 | gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); |
111 | |
112 | return page_pool_alloc_frag(pool, offset, size, gfp); |
113 | } |
114 | |
115 | static inline struct page *page_pool_alloc(struct page_pool *pool, |
116 | unsigned int *offset, |
117 | unsigned int *size, gfp_t gfp) |
118 | { |
119 | unsigned int max_size = PAGE_SIZE << pool->p.order; |
120 | struct page *page; |
121 | |
122 | if ((*size << 1) > max_size) { |
123 | *size = max_size; |
124 | *offset = 0; |
125 | return page_pool_alloc_pages(pool, gfp); |
126 | } |
127 | |
128 | page = page_pool_alloc_frag(pool, offset, size: *size, gfp); |
129 | if (unlikely(!page)) |
130 | return NULL; |
131 | |
132 | /* There is very likely not enough space for another fragment, so append |
133 | * the remaining size to the current fragment to avoid truesize |
134 | * underestimate problem. |
135 | */ |
136 | if (pool->frag_offset + *size > max_size) { |
137 | *size = max_size - *offset; |
138 | pool->frag_offset = max_size; |
139 | } |
140 | |
141 | return page; |
142 | } |
143 | |
144 | /** |
145 | * page_pool_dev_alloc() - allocate a page or a page fragment. |
146 | * @pool: pool from which to allocate |
147 | * @offset: offset to the allocated page |
148 | * @size: in as the requested size, out as the allocated size |
149 | * |
150 | * Get a page or a page fragment from the page allocator or page_pool caches |
151 | * depending on the requested size in order to allocate memory with least memory |
152 | * utilization and performance penalty. |
153 | * |
154 | * Return: |
155 | * Return allocated page or page fragment, otherwise return NULL. |
156 | */ |
157 | static inline struct page *page_pool_dev_alloc(struct page_pool *pool, |
158 | unsigned int *offset, |
159 | unsigned int *size) |
160 | { |
161 | gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); |
162 | |
163 | return page_pool_alloc(pool, offset, size, gfp); |
164 | } |
165 | |
166 | static inline void *page_pool_alloc_va(struct page_pool *pool, |
167 | unsigned int *size, gfp_t gfp) |
168 | { |
169 | unsigned int offset; |
170 | struct page *page; |
171 | |
172 | /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */ |
173 | page = page_pool_alloc(pool, offset: &offset, size, gfp: gfp & ~__GFP_HIGHMEM); |
174 | if (unlikely(!page)) |
175 | return NULL; |
176 | |
177 | return page_address(page) + offset; |
178 | } |
179 | |
180 | /** |
181 | * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its |
182 | * va. |
183 | * @pool: pool from which to allocate |
184 | * @size: in as the requested size, out as the allocated size |
185 | * |
186 | * This is just a thin wrapper around the page_pool_alloc() API, and |
187 | * it returns va of the allocated page or page fragment. |
188 | * |
189 | * Return: |
190 | * Return the va for the allocated page or page fragment, otherwise return NULL. |
191 | */ |
192 | static inline void *page_pool_dev_alloc_va(struct page_pool *pool, |
193 | unsigned int *size) |
194 | { |
195 | gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); |
196 | |
197 | return page_pool_alloc_va(pool, size, gfp); |
198 | } |
199 | |
200 | /** |
201 | * page_pool_get_dma_dir() - Retrieve the stored DMA direction. |
202 | * @pool: pool from which page was allocated |
203 | * |
204 | * Get the stored dma direction. A driver might decide to store this locally |
205 | * and avoid the extra cache line from page_pool to determine the direction. |
206 | */ |
207 | static |
208 | inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) |
209 | { |
210 | return pool->p.dma_dir; |
211 | } |
212 | |
213 | /** |
214 | * page_pool_fragment_page() - split a fresh page into fragments |
215 | * @page: page to split |
216 | * @nr: references to set |
217 | * |
218 | * pp_ref_count represents the number of outstanding references to the page, |
219 | * which will be freed using page_pool APIs (rather than page allocator APIs |
220 | * like put_page()). Such references are usually held by page_pool-aware |
221 | * objects like skbs marked for page pool recycling. |
222 | * |
223 | * This helper allows the caller to take (set) multiple references to a |
224 | * freshly allocated page. The page must be freshly allocated (have a |
225 | * pp_ref_count of 1). This is commonly done by drivers and |
226 | * "fragment allocators" to save atomic operations - either when they know |
227 | * upfront how many references they will need; or to take MAX references and |
228 | * return the unused ones with a single atomic dec(), instead of performing |
229 | * multiple atomic inc() operations. |
230 | */ |
231 | static inline void page_pool_fragment_page(struct page *page, long nr) |
232 | { |
233 | atomic_long_set(v: &page->pp_ref_count, i: nr); |
234 | } |
235 | |
236 | static inline long page_pool_unref_page(struct page *page, long nr) |
237 | { |
238 | long ret; |
239 | |
240 | /* If nr == pp_ref_count then we have cleared all remaining |
241 | * references to the page: |
242 | * 1. 'n == 1': no need to actually overwrite it. |
243 | * 2. 'n != 1': overwrite it with one, which is the rare case |
244 | * for pp_ref_count draining. |
245 | * |
246 | * The main advantage to doing this is that not only we avoid a atomic |
247 | * update, as an atomic_read is generally a much cheaper operation than |
248 | * an atomic update, especially when dealing with a page that may be |
249 | * referenced by only 2 or 3 users; but also unify the pp_ref_count |
250 | * handling by ensuring all pages have partitioned into only 1 piece |
251 | * initially, and only overwrite it when the page is partitioned into |
252 | * more than one piece. |
253 | */ |
254 | if (atomic_long_read(v: &page->pp_ref_count) == nr) { |
255 | /* As we have ensured nr is always one for constant case using |
256 | * the BUILD_BUG_ON(), only need to handle the non-constant case |
257 | * here for pp_ref_count draining, which is a rare case. |
258 | */ |
259 | BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); |
260 | if (!__builtin_constant_p(nr)) |
261 | atomic_long_set(v: &page->pp_ref_count, i: 1); |
262 | |
263 | return 0; |
264 | } |
265 | |
266 | ret = atomic_long_sub_return(i: nr, v: &page->pp_ref_count); |
267 | WARN_ON(ret < 0); |
268 | |
269 | /* We are the last user here too, reset pp_ref_count back to 1 to |
270 | * ensure all pages have been partitioned into 1 piece initially, |
271 | * this should be the rare case when the last two fragment users call |
272 | * page_pool_unref_page() currently. |
273 | */ |
274 | if (unlikely(!ret)) |
275 | atomic_long_set(v: &page->pp_ref_count, i: 1); |
276 | |
277 | return ret; |
278 | } |
279 | |
280 | static inline void page_pool_ref_page(struct page *page) |
281 | { |
282 | atomic_long_inc(v: &page->pp_ref_count); |
283 | } |
284 | |
285 | static inline bool page_pool_is_last_ref(struct page *page) |
286 | { |
287 | /* If page_pool_unref_page() returns 0, we were the last user */ |
288 | return page_pool_unref_page(page, nr: 1) == 0; |
289 | } |
290 | |
291 | /** |
292 | * page_pool_put_page() - release a reference to a page pool page |
293 | * @pool: pool from which page was allocated |
294 | * @page: page to release a reference on |
295 | * @dma_sync_size: how much of the page may have been touched by the device |
296 | * @allow_direct: released by the consumer, allow lockless caching |
297 | * |
298 | * The outcome of this depends on the page refcnt. If the driver bumps |
299 | * the refcnt > 1 this will unmap the page. If the page refcnt is 1 |
300 | * the allocator owns the page and will try to recycle it in one of the pool |
301 | * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device |
302 | * using dma_sync_single_range_for_device(). |
303 | */ |
304 | static inline void page_pool_put_page(struct page_pool *pool, |
305 | struct page *page, |
306 | unsigned int dma_sync_size, |
307 | bool allow_direct) |
308 | { |
309 | /* When page_pool isn't compiled-in, net/core/xdp.c doesn't |
310 | * allow registering MEM_TYPE_PAGE_POOL, but shield linker. |
311 | */ |
312 | #ifdef CONFIG_PAGE_POOL |
313 | if (!page_pool_is_last_ref(page)) |
314 | return; |
315 | |
316 | page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct); |
317 | #endif |
318 | } |
319 | |
320 | /** |
321 | * page_pool_put_full_page() - release a reference on a page pool page |
322 | * @pool: pool from which page was allocated |
323 | * @page: page to release a reference on |
324 | * @allow_direct: released by the consumer, allow lockless caching |
325 | * |
326 | * Similar to page_pool_put_page(), but will DMA sync the entire memory area |
327 | * as configured in &page_pool_params.max_len. |
328 | */ |
329 | static inline void page_pool_put_full_page(struct page_pool *pool, |
330 | struct page *page, bool allow_direct) |
331 | { |
332 | page_pool_put_page(pool, page, dma_sync_size: -1, allow_direct); |
333 | } |
334 | |
335 | /** |
336 | * page_pool_recycle_direct() - release a reference on a page pool page |
337 | * @pool: pool from which page was allocated |
338 | * @page: page to release a reference on |
339 | * |
340 | * Similar to page_pool_put_full_page() but caller must guarantee safe context |
341 | * (e.g NAPI), since it will recycle the page directly into the pool fast cache. |
342 | */ |
343 | static inline void page_pool_recycle_direct(struct page_pool *pool, |
344 | struct page *page) |
345 | { |
346 | page_pool_put_full_page(pool, page, allow_direct: true); |
347 | } |
348 | |
349 | #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ |
350 | (sizeof(dma_addr_t) > sizeof(unsigned long)) |
351 | |
352 | /** |
353 | * page_pool_free_va() - free a va into the page_pool |
354 | * @pool: pool from which va was allocated |
355 | * @va: va to be freed |
356 | * @allow_direct: freed by the consumer, allow lockless caching |
357 | * |
358 | * Free a va allocated from page_pool_allo_va(). |
359 | */ |
360 | static inline void page_pool_free_va(struct page_pool *pool, void *va, |
361 | bool allow_direct) |
362 | { |
363 | page_pool_put_page(pool, page: virt_to_head_page(x: va), dma_sync_size: -1, allow_direct); |
364 | } |
365 | |
366 | /** |
367 | * page_pool_get_dma_addr() - Retrieve the stored DMA address. |
368 | * @page: page allocated from a page pool |
369 | * |
370 | * Fetch the DMA address of the page. The page pool to which the page belongs |
371 | * must had been created with PP_FLAG_DMA_MAP. |
372 | */ |
373 | static inline dma_addr_t page_pool_get_dma_addr(struct page *page) |
374 | { |
375 | dma_addr_t ret = page->dma_addr; |
376 | |
377 | if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) |
378 | ret <<= PAGE_SHIFT; |
379 | |
380 | return ret; |
381 | } |
382 | |
383 | static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) |
384 | { |
385 | if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { |
386 | page->dma_addr = addr >> PAGE_SHIFT; |
387 | |
388 | /* We assume page alignment to shave off bottom bits, |
389 | * if this "compression" doesn't work we need to drop. |
390 | */ |
391 | return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; |
392 | } |
393 | |
394 | page->dma_addr = addr; |
395 | return false; |
396 | } |
397 | |
398 | static inline bool page_pool_put(struct page_pool *pool) |
399 | { |
400 | return refcount_dec_and_test(r: &pool->user_cnt); |
401 | } |
402 | |
403 | static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) |
404 | { |
405 | if (unlikely(pool->p.nid != new_nid)) |
406 | page_pool_update_nid(pool, new_nid); |
407 | } |
408 | |
409 | #endif /* _NET_PAGE_POOL_HELPERS_H */ |
410 | |