]> www.infradead.org Git - users/hch/misc.git/commitdiff
page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches
authorToke Høiland-Jørgensen <toke@redhat.com>
Tue, 30 Sep 2025 11:43:29 +0000 (13:43 +0200)
committerJakub Kicinski <kuba@kernel.org>
Mon, 6 Oct 2025 19:14:04 +0000 (12:14 -0700)
Helge reported that the introduction of PP_MAGIC_MASK let to crashes on
boot on his 32-bit parisc machine. The cause of this is the mask is set
too wide, so the page_pool_page_is_pp() incurs false positives which
crashes the machine.

Just disabling the check in page_pool_is_pp() will lead to the page_pool
code itself malfunctioning; so instead of doing this, this patch changes
the define for PP_DMA_INDEX_BITS to avoid mistaking arbitrary kernel
pointers for page_pool-tagged pages.

The fix relies on the kernel pointers that alias with the pp_magic field
always being above PAGE_OFFSET. With this assumption, we can use the
lowest bit of the value of PAGE_OFFSET as the upper bound of the
PP_DMA_INDEX_MASK, which should avoid the false positives.

Because we cannot rely on PAGE_OFFSET always being a compile-time
constant, nor on it always being >0, we fall back to disabling the
dma_index storage when there are not enough bits available. This leaves
us in the situation we were in before the patch in the Fixes tag, but
only on a subset of architecture configurations. This seems to be the
best we can do until the transition to page types in complete for
page_pool pages.

v2:
- Make sure there's at least 8 bits available and that the PAGE_OFFSET
  bit calculation doesn't wrap

Link: https://lore.kernel.org/all/aMNJMFa5fDalFmtn@p100/
Fixes: ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool")
Cc: stable@vger.kernel.org # 6.15+
Tested-by: Helge Deller <deller@gmx.de>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Tested-by: Helge Deller <deller@gmx.de>
Link: https://patch.msgid.link/20250930114331.675412-1-toke@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/mm.h
net/core/page_pool.c

index 1ae97a0b8ec756e8b327cf6f848c6a5d9f36c341..0905eb6b55ec9fdc5124a509468430522d4a2102 100644 (file)
@@ -4159,14 +4159,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
  * since this value becomes part of PP_SIGNATURE; meaning we can just use the
  * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
  * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
- * 0, we make sure that we leave the two topmost bits empty, as that guarantees
- * we won't mistake a valid kernel pointer for a value we set, regardless of the
- * VMSPLIT setting.
+ * 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is
+ * known at compile-time.
  *
- * Altogether, this means that the number of bits available is constrained by
- * the size of an unsigned long (at the upper end, subtracting two bits per the
- * above), and the definition of PP_SIGNATURE (with or without
- * POISON_POINTER_DELTA).
+ * If the value of PAGE_OFFSET is not known at compile time, or if it is too
+ * small to leave at least 8 bits available above PP_SIGNATURE, we define the
+ * number of bits to be 0, which turns off the DMA index tracking altogether
+ * (see page_pool_register_dma_index()).
  */
 #define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
 #if POISON_POINTER_DELTA > 0
@@ -4175,8 +4174,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
  */
 #define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
 #else
-/* Always leave out the topmost two; see above. */
-#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
+/* Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above */
+#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8))
+#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \
+                           PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \
+                           !(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \
+                             MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0)
+
 #endif
 
 #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
index 492728f9e021d3564a94d4222293c13fe2195448..1a5edec485f141b2c13599dc84a60af20b809f78 100644 (file)
@@ -468,11 +468,60 @@ page_pool_dma_sync_for_device(const struct page_pool *pool,
        }
 }
 
+static int page_pool_register_dma_index(struct page_pool *pool,
+                                       netmem_ref netmem, gfp_t gfp)
+{
+       int err = 0;
+       u32 id;
+
+       if (unlikely(!PP_DMA_INDEX_BITS))
+               goto out;
+
+       if (in_softirq())
+               err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
+                              PP_DMA_INDEX_LIMIT, gfp);
+       else
+               err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
+                                 PP_DMA_INDEX_LIMIT, gfp);
+       if (err) {
+               WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
+               goto out;
+       }
+
+       netmem_set_dma_index(netmem, id);
+out:
+       return err;
+}
+
+static int page_pool_release_dma_index(struct page_pool *pool,
+                                      netmem_ref netmem)
+{
+       struct page *old, *page = netmem_to_page(netmem);
+       unsigned long id;
+
+       if (unlikely(!PP_DMA_INDEX_BITS))
+               return 0;
+
+       id = netmem_get_dma_index(netmem);
+       if (!id)
+               return -1;
+
+       if (in_softirq())
+               old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
+       else
+               old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
+       if (old != page)
+               return -1;
+
+       netmem_set_dma_index(netmem, 0);
+
+       return 0;
+}
+
 static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp)
 {
        dma_addr_t dma;
        int err;
-       u32 id;
 
        /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
         * since dma_addr_t can be either 32 or 64 bits and does not always fit
@@ -491,18 +540,10 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t g
                goto unmap_failed;
        }
 
-       if (in_softirq())
-               err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
-                              PP_DMA_INDEX_LIMIT, gfp);
-       else
-               err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
-                                 PP_DMA_INDEX_LIMIT, gfp);
-       if (err) {
-               WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
+       err = page_pool_register_dma_index(pool, netmem, gfp);
+       if (err)
                goto unset_failed;
-       }
 
-       netmem_set_dma_index(netmem, id);
        page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
 
        return true;
@@ -680,8 +721,6 @@ void page_pool_clear_pp_info(netmem_ref netmem)
 static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool,
                                                           netmem_ref netmem)
 {
-       struct page *old, *page = netmem_to_page(netmem);
-       unsigned long id;
        dma_addr_t dma;
 
        if (!pool->dma_map)
@@ -690,15 +729,7 @@ static __always_inline void __page_pool_release_netmem_dma(struct page_pool *poo
                 */
                return;
 
-       id = netmem_get_dma_index(netmem);
-       if (!id)
-               return;
-
-       if (in_softirq())
-               old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
-       else
-               old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
-       if (old != page)
+       if (page_pool_release_dma_index(pool, netmem))
                return;
 
        dma = page_pool_get_dma_addr_netmem(netmem);
@@ -708,7 +739,6 @@ static __always_inline void __page_pool_release_netmem_dma(struct page_pool *poo
                             PAGE_SIZE << pool->p.order, pool->p.dma_dir,
                             DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
        page_pool_set_dma_addr_netmem(netmem, 0);
-       netmem_set_dma_index(netmem, 0);
 }
 
 /* Disconnects a page (from a page_pool).  API users can have a need