kernel: add missing optimization for page pool
[openwrt/openwrt.git] / target / linux / generic / backport-5.10 / 630-v5.15-page_pool_frag_support.patch
1 --- a/include/net/page_pool.h
2 +++ b/include/net/page_pool.h
3 @@ -45,7 +45,10 @@
4 * Please note DMA-sync-for-CPU is still
5 * device driver responsibility
6 */
7 -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
8 +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
9 +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
10 + PP_FLAG_DMA_SYNC_DEV |\
11 + PP_FLAG_PAGE_FRAG)
12
13 /*
14 * Fast allocation side cache array/stack
15 @@ -65,7 +68,7 @@
16 #define PP_ALLOC_CACHE_REFILL 64
17 struct pp_alloc_cache {
18 u32 count;
19 - void *cache[PP_ALLOC_CACHE_SIZE];
20 + struct page *cache[PP_ALLOC_CACHE_SIZE];
21 };
22
23 struct page_pool_params {
24 @@ -79,6 +82,22 @@ struct page_pool_params {
25 unsigned int offset; /* DMA addr offset */
26 };
27
28 +
29 +static inline int page_pool_ethtool_stats_get_count(void)
30 +{
31 + return 0;
32 +}
33 +
34 +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
35 +{
36 + return data;
37 +}
38 +
39 +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
40 +{
41 + return data;
42 +}
43 +
44 struct page_pool {
45 struct page_pool_params p;
46
47 @@ -88,6 +107,9 @@ struct page_pool {
48 unsigned long defer_warn;
49
50 u32 pages_state_hold_cnt;
51 + unsigned int frag_offset;
52 + struct page *frag_page;
53 + long frag_users;
54
55 /*
56 * Data structure for allocation side
57 @@ -137,6 +159,18 @@ static inline struct page *page_pool_dev
58 return page_pool_alloc_pages(pool, gfp);
59 }
60
61 +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
62 + unsigned int size, gfp_t gfp);
63 +
64 +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
65 + unsigned int *offset,
66 + unsigned int size)
67 +{
68 + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
69 +
70 + return page_pool_alloc_frag(pool, offset, size, gfp);
71 +}
72 +
73 /* get the stored dma direction. A driver might decide to treat this locally and
74 * avoid the extra cache line from page_pool to determine the direction
75 */
76 @@ -146,6 +180,8 @@ inline enum dma_data_direction page_pool
77 return pool->p.dma_dir;
78 }
79
80 +bool page_pool_return_skb_page(struct page *page);
81 +
82 struct page_pool *page_pool_create(const struct page_pool_params *params);
83
84 #ifdef CONFIG_PAGE_POOL
85 @@ -165,6 +201,7 @@ static inline void page_pool_release_pag
86 struct page *page)
87 {
88 }
89 +
90 #endif
91
92 void page_pool_put_page(struct page_pool *pool, struct page *page,
93 @@ -189,19 +226,48 @@ static inline void page_pool_recycle_dir
94 page_pool_put_full_page(pool, page, true);
95 }
96
97 +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
98 + (sizeof(dma_addr_t) > sizeof(unsigned long))
99 +
100 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
101 {
102 - dma_addr_t ret = page->dma_addr[0];
103 - if (sizeof(dma_addr_t) > sizeof(unsigned long))
104 - ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
105 + dma_addr_t ret = page->dma_addr;
106 +
107 + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
108 + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
109 +
110 return ret;
111 }
112
113 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
114 {
115 - page->dma_addr[0] = addr;
116 - if (sizeof(dma_addr_t) > sizeof(unsigned long))
117 - page->dma_addr[1] = upper_32_bits(addr);
118 + page->dma_addr = addr;
119 + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
120 + page->dma_addr_upper = upper_32_bits(addr);
121 +}
122 +
123 +static inline void page_pool_set_frag_count(struct page *page, long nr)
124 +{
125 + atomic_long_set(&page->pp_frag_count, nr);
126 +}
127 +
128 +static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
129 + long nr)
130 +{
131 + long ret;
132 +
133 + /* As suggested by Alexander, atomic_long_read() may cover up the
134 + * reference count errors, so avoid calling atomic_long_read() in
135 + * the cases of freeing or draining the page_frags, where we would
136 + * not expect it to match or that are slowpath anyway.
137 + */
138 + if (__builtin_constant_p(nr) &&
139 + atomic_long_read(&page->pp_frag_count) == nr)
140 + return 0;
141 +
142 + ret = atomic_long_sub_return(nr, &page->pp_frag_count);
143 + WARN_ON(ret < 0);
144 + return ret;
145 }
146
147 static inline bool is_page_pool_compiled_in(void)
148 @@ -225,4 +291,23 @@ static inline void page_pool_nid_changed
149 if (unlikely(pool->p.nid != new_nid))
150 page_pool_update_nid(pool, new_nid);
151 }
152 +
153 +static inline void page_pool_ring_lock(struct page_pool *pool)
154 + __acquires(&pool->ring.producer_lock)
155 +{
156 + if (in_serving_softirq())
157 + spin_lock(&pool->ring.producer_lock);
158 + else
159 + spin_lock_bh(&pool->ring.producer_lock);
160 +}
161 +
162 +static inline void page_pool_ring_unlock(struct page_pool *pool)
163 + __releases(&pool->ring.producer_lock)
164 +{
165 + if (in_serving_softirq())
166 + spin_unlock(&pool->ring.producer_lock);
167 + else
168 + spin_unlock_bh(&pool->ring.producer_lock);
169 +}
170 +
171 #endif /* _NET_PAGE_POOL_H */
172 --- a/net/core/page_pool.c
173 +++ b/net/core/page_pool.c
174 @@ -11,16 +11,22 @@
175 #include <linux/device.h>
176
177 #include <net/page_pool.h>
178 +#include <net/xdp.h>
179 +
180 #include <linux/dma-direction.h>
181 #include <linux/dma-mapping.h>
182 #include <linux/page-flags.h>
183 #include <linux/mm.h> /* for __put_page() */
184 +#include <linux/poison.h>
185 +#include <linux/ethtool.h>
186
187 #include <trace/events/page_pool.h>
188
189 #define DEFER_TIME (msecs_to_jiffies(1000))
190 #define DEFER_WARN_INTERVAL (60 * HZ)
191
192 +#define BIAS_MAX LONG_MAX
193 +
194 static int page_pool_init(struct page_pool *pool,
195 const struct page_pool_params *params)
196 {
197 @@ -64,6 +70,10 @@ static int page_pool_init(struct page_po
198 */
199 }
200
201 + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
202 + pool->p.flags & PP_FLAG_PAGE_FRAG)
203 + return -EINVAL;
204 +
205 if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
206 return -ENOMEM;
207
208 @@ -180,40 +190,10 @@ static void page_pool_dma_sync_for_devic
209 pool->p.dma_dir);
210 }
211
212 -/* slow path */
213 -noinline
214 -static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
215 - gfp_t _gfp)
216 +static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
217 {
218 - struct page *page;
219 - gfp_t gfp = _gfp;
220 dma_addr_t dma;
221
222 - /* We could always set __GFP_COMP, and avoid this branch, as
223 - * prep_new_page() can handle order-0 with __GFP_COMP.
224 - */
225 - if (pool->p.order)
226 - gfp |= __GFP_COMP;
227 -
228 - /* FUTURE development:
229 - *
230 - * Current slow-path essentially falls back to single page
231 - * allocations, which doesn't improve performance. This code
232 - * need bulk allocation support from the page allocator code.
233 - */
234 -
235 - /* Cache was empty, do real allocation */
236 -#ifdef CONFIG_NUMA
237 - page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
238 -#else
239 - page = alloc_pages(gfp, pool->p.order);
240 -#endif
241 - if (!page)
242 - return NULL;
243 -
244 - if (!(pool->p.flags & PP_FLAG_DMA_MAP))
245 - goto skip_dma_map;
246 -
247 /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
248 * since dma_addr_t can be either 32 or 64 bits and does not always fit
249 * into page private data (i.e 32bit cpu with 64bit DMA caps)
250 @@ -222,22 +202,53 @@ static struct page *__page_pool_alloc_pa
251 dma = dma_map_page_attrs(pool->p.dev, page, 0,
252 (PAGE_SIZE << pool->p.order),
253 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
254 - if (dma_mapping_error(pool->p.dev, dma)) {
255 - put_page(page);
256 - return NULL;
257 - }
258 + if (dma_mapping_error(pool->p.dev, dma))
259 + return false;
260 +
261 page_pool_set_dma_addr(page, dma);
262
263 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
264 page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
265
266 -skip_dma_map:
267 + return true;
268 +}
269 +
270 +static void page_pool_set_pp_info(struct page_pool *pool,
271 + struct page *page)
272 +{
273 + page->pp = pool;
274 + page->pp_magic |= PP_SIGNATURE;
275 +}
276 +
277 +static void page_pool_clear_pp_info(struct page *page)
278 +{
279 + page->pp_magic = 0;
280 + page->pp = NULL;
281 +}
282 +
283 +/* slow path */
284 +noinline
285 +static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
286 + gfp_t gfp)
287 +{
288 + struct page *page;
289 +
290 + gfp |= __GFP_COMP;
291 + page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
292 + if (unlikely(!page))
293 + return NULL;
294 +
295 + if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
296 + unlikely(!page_pool_dma_map(pool, page))) {
297 + put_page(page);
298 + return NULL;
299 + }
300 +
301 + page_pool_set_pp_info(pool, page);
302 +
303 /* Track how many pages are held 'in-flight' */
304 pool->pages_state_hold_cnt++;
305 -
306 trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
307 -
308 - /* When page just alloc'ed is should/must have refcnt 1. */
309 return page;
310 }
311
312 @@ -302,10 +313,12 @@ void page_pool_release_page(struct page_
313 DMA_ATTR_SKIP_CPU_SYNC);
314 page_pool_set_dma_addr(page, 0);
315 skip_dma_unmap:
316 + page_pool_clear_pp_info(page);
317 +
318 /* This may be the last page returned, releasing the pool, so
319 * it is not safe to reference pool afterwards.
320 */
321 - count = atomic_inc_return(&pool->pages_state_release_cnt);
322 + count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
323 trace_page_pool_state_release(pool, page, count);
324 }
325 EXPORT_SYMBOL(page_pool_release_page);
326 @@ -331,7 +344,10 @@ static bool page_pool_recycle_in_ring(st
327 else
328 ret = ptr_ring_produce_bh(&pool->ring, page);
329
330 - return (ret == 0) ? true : false;
331 + if (!ret)
332 + return true;
333 +
334 + return false;
335 }
336
337 /* Only allow direct recycling in special circumstances, into the
338 @@ -350,46 +366,43 @@ static bool page_pool_recycle_in_cache(s
339 return true;
340 }
341
342 -/* page is NOT reusable when:
343 - * 1) allocated when system is under some pressure. (page_is_pfmemalloc)
344 - */
345 -static bool pool_page_reusable(struct page_pool *pool, struct page *page)
346 -{
347 - return !page_is_pfmemalloc(page);
348 -}
349 -
350 /* If the page refcnt == 1, this will try to recycle the page.
351 * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
352 * the configured size min(dma_sync_size, pool->max_len).
353 * If the page refcnt != 1, then the page will be returned to memory
354 * subsystem.
355 */
356 -void page_pool_put_page(struct page_pool *pool, struct page *page,
357 - unsigned int dma_sync_size, bool allow_direct)
358 -{
359 +static __always_inline struct page *
360 +__page_pool_put_page(struct page_pool *pool, struct page *page,
361 + unsigned int dma_sync_size, bool allow_direct)
362 +{
363 + /* It is not the last user for the page frag case */
364 + if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
365 + page_pool_atomic_sub_frag_count_return(page, 1))
366 + return NULL;
367 +
368 /* This allocator is optimized for the XDP mode that uses
369 * one-frame-per-page, but have fallbacks that act like the
370 * regular page allocator APIs.
371 *
372 * refcnt == 1 means page_pool owns page, and can recycle it.
373 + *
374 + * page is NOT reusable when allocated when system is under
375 + * some pressure. (page_is_pfmemalloc)
376 */
377 - if (likely(page_ref_count(page) == 1 &&
378 - pool_page_reusable(pool, page))) {
379 + if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
380 /* Read barrier done in page_ref_count / READ_ONCE */
381
382 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
383 page_pool_dma_sync_for_device(pool, page,
384 dma_sync_size);
385
386 - if (allow_direct && in_serving_softirq())
387 - if (page_pool_recycle_in_cache(page, pool))
388 - return;
389 + if (allow_direct && in_serving_softirq() &&
390 + page_pool_recycle_in_cache(page, pool))
391 + return NULL;
392
393 - if (!page_pool_recycle_in_ring(pool, page)) {
394 - /* Cache full, fallback to free pages */
395 - page_pool_return_page(pool, page);
396 - }
397 - return;
398 + /* Page found as candidate for recycling */
399 + return page;
400 }
401 /* Fallback/non-XDP mode: API user have elevated refcnt.
402 *
403 @@ -407,9 +420,98 @@ void page_pool_put_page(struct page_pool
404 /* Do not replace this with page_pool_return_page() */
405 page_pool_release_page(pool, page);
406 put_page(page);
407 +
408 + return NULL;
409 +}
410 +
411 +void page_pool_put_page(struct page_pool *pool, struct page *page,
412 + unsigned int dma_sync_size, bool allow_direct)
413 +{
414 + page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
415 + if (page && !page_pool_recycle_in_ring(pool, page))
416 + /* Cache full, fallback to free pages */
417 + page_pool_return_page(pool, page);
418 }
419 EXPORT_SYMBOL(page_pool_put_page);
420
421 +static struct page *page_pool_drain_frag(struct page_pool *pool,
422 + struct page *page)
423 +{
424 + long drain_count = BIAS_MAX - pool->frag_users;
425 +
426 + /* Some user is still using the page frag */
427 + if (likely(page_pool_atomic_sub_frag_count_return(page,
428 + drain_count)))
429 + return NULL;
430 +
431 + if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
432 + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
433 + page_pool_dma_sync_for_device(pool, page, -1);
434 +
435 + return page;
436 + }
437 +
438 + page_pool_return_page(pool, page);
439 + return NULL;
440 +}
441 +
442 +static void page_pool_free_frag(struct page_pool *pool)
443 +{
444 + long drain_count = BIAS_MAX - pool->frag_users;
445 + struct page *page = pool->frag_page;
446 +
447 + pool->frag_page = NULL;
448 +
449 + if (!page ||
450 + page_pool_atomic_sub_frag_count_return(page, drain_count))
451 + return;
452 +
453 + page_pool_return_page(pool, page);
454 +}
455 +
456 +struct page *page_pool_alloc_frag(struct page_pool *pool,
457 + unsigned int *offset,
458 + unsigned int size, gfp_t gfp)
459 +{
460 + unsigned int max_size = PAGE_SIZE << pool->p.order;
461 + struct page *page = pool->frag_page;
462 +
463 + if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
464 + size > max_size))
465 + return NULL;
466 +
467 + size = ALIGN(size, dma_get_cache_alignment());
468 + *offset = pool->frag_offset;
469 +
470 + if (page && *offset + size > max_size) {
471 + page = page_pool_drain_frag(pool, page);
472 + if (page)
473 + goto frag_reset;
474 + }
475 +
476 + if (!page) {
477 + page = page_pool_alloc_pages(pool, gfp);
478 + if (unlikely(!page)) {
479 + pool->frag_page = NULL;
480 + return NULL;
481 + }
482 +
483 + pool->frag_page = page;
484 +
485 +frag_reset:
486 + pool->frag_users = 1;
487 + *offset = 0;
488 + pool->frag_offset = size;
489 + page_pool_set_frag_count(page, BIAS_MAX);
490 + return page;
491 + }
492 +
493 + pool->frag_users++;
494 + pool->frag_offset = *offset + size;
495 + return page;
496 +}
497 +EXPORT_SYMBOL(page_pool_alloc_frag);
498 +
499 static void page_pool_empty_ring(struct page_pool *pool)
500 {
501 struct page *page;
502 @@ -515,6 +617,8 @@ void page_pool_destroy(struct page_pool
503 if (!page_pool_put(pool))
504 return;
505
506 + page_pool_free_frag(pool);
507 +
508 if (!page_pool_release(pool))
509 return;
510
511 @@ -541,3 +645,32 @@ void page_pool_update_nid(struct page_po
512 }
513 }
514 EXPORT_SYMBOL(page_pool_update_nid);
515 +
516 +bool page_pool_return_skb_page(struct page *page)
517 +{
518 + struct page_pool *pp;
519 +
520 + page = compound_head(page);
521 +
522 + /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
523 + * in order to preserve any existing bits, such as bit 0 for the
524 + * head page of compound page and bit 1 for pfmemalloc page, so
525 + * mask those bits for freeing side when doing below checking,
526 + * and page_is_pfmemalloc() is checked in __page_pool_put_page()
527 + * to avoid recycling the pfmemalloc page.
528 + */
529 + if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
530 + return false;
531 +
532 + pp = page->pp;
533 +
534 + /* Driver set this to memory recycling info. Reset it on recycle.
535 + * This will *not* work for NIC using a split-page memory model.
536 + * The page will be returned to the pool here regardless of the
537 + * 'flipped' fragment being in use or not.
538 + */
539 + page_pool_put_full_page(pp, page, false);
540 +
541 + return true;
542 +}
543 +EXPORT_SYMBOL(page_pool_return_skb_page);
544 --- a/include/linux/mm_types.h
545 +++ b/include/linux/mm_types.h
546 @@ -97,10 +97,25 @@ struct page {
547 };
548 struct { /* page_pool used by netstack */
549 /**
550 - * @dma_addr: might require a 64-bit value on
551 - * 32-bit architectures.
552 + * @pp_magic: magic value to avoid recycling non
553 + * page_pool allocated pages.
554 */
555 - unsigned long dma_addr[2];
556 + unsigned long pp_magic;
557 + struct page_pool *pp;
558 + unsigned long _pp_mapping_pad;
559 + unsigned long dma_addr;
560 + union {
561 + /**
562 + * dma_addr_upper: might require a 64-bit
563 + * value on 32-bit architectures.
564 + */
565 + unsigned long dma_addr_upper;
566 + /**
567 + * For frag page support, not supported in
568 + * 32-bit architectures with 64-bit DMA.
569 + */
570 + atomic_long_t pp_frag_count;
571 + };
572 };
573 struct { /* slab, slob and slub */
574 union {
575 --- a/net/core/skbuff.c
576 +++ b/net/core/skbuff.c
577 @@ -594,13 +594,22 @@ static void skb_clone_fraglist(struct sk
578 skb_get(list);
579 }
580
581 +static bool skb_pp_recycle(struct sk_buff *skb, void *data)
582 +{
583 + if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
584 + return false;
585 + return page_pool_return_skb_page(virt_to_page(data));
586 +}
587 +
588 static void skb_free_head(struct sk_buff *skb)
589 {
590 unsigned char *head = skb->head;
591
592 - if (skb->head_frag)
593 + if (skb->head_frag) {
594 + if (skb_pp_recycle(skb, head))
595 + return;
596 skb_free_frag(head);
597 - else
598 + } else
599 kfree(head);
600 }
601
602 @@ -612,16 +621,27 @@ static void skb_release_data(struct sk_b
603 if (skb->cloned &&
604 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
605 &shinfo->dataref))
606 - return;
607 + goto exit;
608
609 for (i = 0; i < shinfo->nr_frags; i++)
610 - __skb_frag_unref(&shinfo->frags[i]);
611 + __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
612
613 if (shinfo->frag_list)
614 kfree_skb_list(shinfo->frag_list);
615
616 skb_zcopy_clear(skb, true);
617 skb_free_head(skb);
618 +exit:
619 + /* When we clone an SKB we copy the reycling bit. The pp_recycle
620 + * bit is only set on the head though, so in order to avoid races
621 + * while trying to recycle fragments on __skb_frag_unref() we need
622 + * to make one SKB responsible for triggering the recycle path.
623 + * So disable the recycling bit if an SKB is cloned and we have
624 + * additional references to to the fragmented part of the SKB.
625 + * Eventually the last SKB will have the recycling bit set and it's
626 + * dataref set to 0, which will trigger the recycling
627 + */
628 + skb->pp_recycle = 0;
629 }
630
631 /*
632 @@ -1003,6 +1023,7 @@ static struct sk_buff *__skb_clone(struc
633 n->nohdr = 0;
634 n->peeked = 0;
635 C(pfmemalloc);
636 + C(pp_recycle);
637 n->destructor = NULL;
638 C(tail);
639 C(end);
640 @@ -3421,7 +3442,7 @@ int skb_shift(struct sk_buff *tgt, struc
641 fragto = &skb_shinfo(tgt)->frags[merge];
642
643 skb_frag_size_add(fragto, skb_frag_size(fragfrom));
644 - __skb_frag_unref(fragfrom);
645 + __skb_frag_unref(fragfrom, skb->pp_recycle);
646 }
647
648 /* Reposition in the original skb */
649 @@ -5188,6 +5209,20 @@ bool skb_try_coalesce(struct sk_buff *to
650 if (skb_cloned(to))
651 return false;
652
653 + /* In general, avoid mixing slab allocated and page_pool allocated
654 + * pages within the same SKB. However when @to is not pp_recycle and
655 + * @from is cloned, we can transition frag pages from page_pool to
656 + * reference counted.
657 + *
658 + * On the other hand, don't allow coalescing two pp_recycle SKBs if
659 + * @from is cloned, in case the SKB is using page_pool fragment
660 + * references (PP_FLAG_PAGE_FRAG). Since we only take full page
661 + * references for cloned SKBs at the moment that would result in
662 + * inconsistent reference counts.
663 + */
664 + if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from)))
665 + return false;
666 +
667 if (len <= skb_tailroom(to)) {
668 if (len)
669 BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
670 --- a/include/linux/skbuff.h
671 +++ b/include/linux/skbuff.h
672 @@ -37,6 +37,7 @@
673 #include <linux/in6.h>
674 #include <linux/if_packet.h>
675 #include <net/flow.h>
676 +#include <net/page_pool.h>
677 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
678 #include <linux/netfilter/nf_conntrack_common.h>
679 #endif
680 @@ -786,7 +787,8 @@ struct sk_buff {
681 fclone:2,
682 peeked:1,
683 head_frag:1,
684 - pfmemalloc:1;
685 + pfmemalloc:1,
686 + pp_recycle:1; /* page_pool recycle indicator */
687 #ifdef CONFIG_SKB_EXTENSIONS
688 __u8 active_extensions;
689 #endif
690 @@ -3029,9 +3031,15 @@ static inline void skb_frag_ref(struct s
691 *
692 * Releases a reference on the paged fragment @frag.
693 */
694 -static inline void __skb_frag_unref(skb_frag_t *frag)
695 +static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
696 {
697 - put_page(skb_frag_page(frag));
698 + struct page *page = skb_frag_page(frag);
699 +
700 +#ifdef CONFIG_PAGE_POOL
701 + if (recycle && page_pool_return_skb_page(page))
702 + return;
703 +#endif
704 + put_page(page);
705 }
706
707 /**
708 @@ -3043,7 +3051,7 @@ static inline void __skb_frag_unref(skb_
709 */
710 static inline void skb_frag_unref(struct sk_buff *skb, int f)
711 {
712 - __skb_frag_unref(&skb_shinfo(skb)->frags[f]);
713 + __skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
714 }
715
716 /**
717 @@ -4642,5 +4650,12 @@ static inline u64 skb_get_kcov_handle(st
718 #endif
719 }
720
721 +#ifdef CONFIG_PAGE_POOL
722 +static inline void skb_mark_for_recycle(struct sk_buff *skb)
723 +{
724 + skb->pp_recycle = 1;
725 +}
726 +#endif
727 +
728 #endif /* __KERNEL__ */
729 #endif /* _LINUX_SKBUFF_H */
730 --- a/drivers/net/ethernet/marvell/sky2.c
731 +++ b/drivers/net/ethernet/marvell/sky2.c
732 @@ -2501,7 +2501,7 @@ static void skb_put_frags(struct sk_buff
733
734 if (length == 0) {
735 /* don't need this page */
736 - __skb_frag_unref(frag);
737 + __skb_frag_unref(frag, false);
738 --skb_shinfo(skb)->nr_frags;
739 } else {
740 size = min(length, (unsigned) PAGE_SIZE);
741 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
742 +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
743 @@ -526,7 +526,7 @@ static int mlx4_en_complete_rx_desc(stru
744 fail:
745 while (nr > 0) {
746 nr--;
747 - __skb_frag_unref(skb_shinfo(skb)->frags + nr);
748 + __skb_frag_unref(skb_shinfo(skb)->frags + nr, false);
749 }
750 return 0;
751 }
752 --- a/net/tls/tls_device.c
753 +++ b/net/tls/tls_device.c
754 @@ -131,7 +131,7 @@ static void destroy_record(struct tls_re
755 int i;
756
757 for (i = 0; i < record->num_frags; i++)
758 - __skb_frag_unref(&record->frags[i]);
759 + __skb_frag_unref(&record->frags[i], false);
760 kfree(record);
761 }
762
763 --- a/include/linux/poison.h
764 +++ b/include/linux/poison.h
765 @@ -82,4 +82,7 @@
766 /********** security/ **********/
767 #define KEY_DESTROY 0xbd
768
769 +/********** net/core/page_pool.c **********/
770 +#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
771 +
772 #endif
773 --- a/include/linux/mm.h
774 +++ b/include/linux/mm.h
775 @@ -1602,7 +1602,7 @@ static inline bool page_is_pfmemalloc(st
776 * Page index cannot be this large so this must be
777 * a pfmemalloc page.
778 */
779 - return page->index == -1UL;
780 + return (uintptr_t)page->lru.next & BIT(1);
781 }
782
783 /*
784 @@ -1611,12 +1611,12 @@ static inline bool page_is_pfmemalloc(st
785 */
786 static inline void set_page_pfmemalloc(struct page *page)
787 {
788 - page->index = -1UL;
789 + page->lru.next = (void *)BIT(1);
790 }
791
792 static inline void clear_page_pfmemalloc(struct page *page)
793 {
794 - page->index = 0;
795 + page->lru.next = NULL;
796 }
797
798 /*