return;
 
        mem = s;
+       mem -= s->align_off;
        kfree(mem);
 }
 
                struct nft_pipapo_scratch *scratch;
 #ifdef NFT_PIPAPO_ALIGN
                void *scratch_aligned;
+               u32 align_off;
 #endif
                scratch = kzalloc_node(struct_size(scratch, map,
                                                   bsize_max * 2) +
 
                pipapo_free_scratch(clone, i);
 
-               *per_cpu_ptr(clone->scratch, i) = scratch;
-
 #ifdef NFT_PIPAPO_ALIGN
                /* Align &scratch->map (not the struct itself): the extra
                 * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
 
                scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
                scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
-               *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+               align_off = scratch_aligned - (void *)scratch;
+
+               scratch = scratch_aligned;
+               scratch->align_off = align_off;
 #endif
+               *per_cpu_ptr(clone->scratch, i) = scratch;
        }
 
        return 0;
        if (!new->scratch)
                goto out_scratch;
 
-#ifdef NFT_PIPAPO_ALIGN
-       new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
-       if (!new->scratch_aligned)
-               goto out_scratch;
-#endif
        for_each_possible_cpu(i)
                *per_cpu_ptr(new->scratch, i) = NULL;
 
 out_scratch_realloc:
        for_each_possible_cpu(i)
                pipapo_free_scratch(new, i);
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(new->scratch_aligned);
-#endif
 out_scratch:
        free_percpu(new->scratch);
        kfree(new);
        for_each_possible_cpu(i)
                pipapo_free_scratch(m, i);
 
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(m->scratch_aligned);
-#endif
        free_percpu(m->scratch);
-
        pipapo_free_fields(m);
 
        kfree(m);
        for_each_possible_cpu(i)
                *per_cpu_ptr(m->scratch, i) = NULL;
 
-#ifdef NFT_PIPAPO_ALIGN
-       m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *);
-       if (!m->scratch_aligned) {
-               err = -ENOMEM;
-               goto out_free;
-       }
-       for_each_possible_cpu(i)
-               *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
        rcu_head_init(&m->rcu);
 
        nft_pipapo_for_each_field(f, i, m) {
        return 0;
 
 out_free:
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(m->scratch_aligned);
-#endif
        free_percpu(m->scratch);
 out_scratch:
        kfree(m);
 
                nft_set_pipapo_match_destroy(ctx, set, m);
 
-#ifdef NFT_PIPAPO_ALIGN
-               free_percpu(m->scratch_aligned);
-#endif
                for_each_possible_cpu(cpu)
                        pipapo_free_scratch(m, cpu);
                free_percpu(m->scratch);
                if (priv->dirty)
                        nft_set_pipapo_match_destroy(ctx, set, m);
 
-#ifdef NFT_PIPAPO_ALIGN
-               free_percpu(priv->clone->scratch_aligned);
-#endif
                for_each_possible_cpu(cpu)
                        pipapo_free_scratch(priv->clone, cpu);
                free_percpu(priv->clone->scratch);
 
 /**
  * struct nft_pipapo_scratch - percpu data used for lookup and matching
  * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
  * @map:       store partial matching results during lookup
  */
 struct nft_pipapo_scratch {
        u8 map_index;
+       u32 align_off;
        unsigned long map[];
 };
 
  * struct nft_pipapo_match - Data used for lookup and matching
  * @field_count                Amount of fields in set
  * @scratch:           Preallocated per-CPU maps for partial matching results
- * @scratch_aligned:   Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
  * @bsize_max:         Maximum lookup table bucket size of all fields, in longs
  * @rcu                        Matching data is swapped on commits
  * @f:                 Fields, with lookup and mapping tables
  */
 struct nft_pipapo_match {
        int field_count;
-#ifdef NFT_PIPAPO_ALIGN
-       struct nft_pipapo_scratch * __percpu *scratch_aligned;
-#endif
        struct nft_pipapo_scratch * __percpu *scratch;
        size_t bsize_max;
        struct rcu_head rcu;