slab: sheaf prefilling for guaranteed allocations

author Vlastimil Babka <vbabka@suse.cz>

Tue, 5 Nov 2024 16:00:08 +0000 (17:00 +0100)

committer Vlastimil Babka <vbabka@suse.cz>

Wed, 11 Jun 2025 14:03:38 +0000 (16:03 +0200)
author Vlastimil Babka <vbabka@suse.cz>
Tue, 5 Nov 2024 16:00:08 +0000 (17:00 +0100)
committer Vlastimil Babka <vbabka@suse.cz>
Wed, 11 Jun 2025 14:03:38 +0000 (16:03 +0200)
diff --git a/include/linux/slab.h b/include/linux/slab.h

index 6cfd085907afb8fc6e502ff7a1a1830c52ff9125..3ff70547db49d0880b1b6cb100527936e88ca509 100644 (file)
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -829,6 +829,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
                                    int node) __assume_slab_alignment __malloc;
  #define kmem_cache_alloc_node(...)     alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
  
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
+
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+               struct slab_sheaf **sheafp, unsigned int size);
+
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+                                      struct slab_sheaf *sheaf);
+
+void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp,
+                       struct slab_sheaf *sheaf) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc_from_sheaf(...)       \
+                       alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__))
+
+unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf);
+
  /*
   * These macros allow declaring a kmem_buckets * parameter alongside size, which
   * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
diff --git a/mm/slub.c b/mm/slub.c

index f6d86cd3983533784583f1df6add186c4a74cd97..8b3093ee2e02c9ff4e149ac54833db4972b414a3 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -384,6 +384,11 @@ enum stat_item {
         BARN_GET_FAIL,          /* Failed to get full sheaf from barn */
         BARN_PUT,               /* Put full sheaf to barn */
         BARN_PUT_FAIL,          /* Failed to put full sheaf to barn */
+       SHEAF_PREFILL_FAST,     /* Sheaf prefill grabbed the spare sheaf */
+       SHEAF_PREFILL_SLOW,     /* Sheaf prefill found no spare sheaf */
+       SHEAF_PREFILL_OVERSIZE, /* Allocation of oversize sheaf for prefill */
+       SHEAF_RETURN_FAST,      /* Sheaf return reattached spare sheaf */
+       SHEAF_RETURN_SLOW,      /* Sheaf return could not reattach spare */
         NR_SLUB_STAT_ITEMS
  };
  
@@ -445,6 +450,8 @@ struct slab_sheaf {
         union {
                 struct rcu_head rcu_head;
                 struct list_head barn_list;
+               /* only used for prefilled sheafs */
+               unsigned int capacity;
         };
         struct kmem_cache *cache;
         unsigned int size;
@@ -2797,6 +2804,30 @@ static void barn_put_full_sheaf(struct node_barn *barn, struct slab_sheaf *sheaf
         spin_unlock_irqrestore(&barn->lock, flags);
  }
  
+static struct slab_sheaf *barn_get_full_or_empty_sheaf(struct node_barn *barn)
+{
+       struct slab_sheaf *sheaf = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&barn->lock, flags);
+
+       if (barn->nr_full) {
+               sheaf = list_first_entry(&barn->sheaves_full, struct slab_sheaf,
+                                       barn_list);
+               list_del(&sheaf->barn_list);
+               barn->nr_full--;
+       } else if (barn->nr_empty) {
+               sheaf = list_first_entry(&barn->sheaves_empty,
+                                        struct slab_sheaf, barn_list);
+               list_del(&sheaf->barn_list);
+               barn->nr_empty--;
+       }
+
+       spin_unlock_irqrestore(&barn->lock, flags);
+
+       return sheaf;
+}
+
  /*
   * If a full sheaf is available, return it and put the supplied empty one to
   * barn. We ignore the limit on empty sheaves as the number of sheaves doesn't
@@ -4919,6 +4950,230 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int nod
  }
  EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
  
+/*
+ * returns a sheaf that has at least the requested size
+ * when prefilling is needed, do so with given gfp flags
+ *
+ * return NULL if sheaf allocation or prefilling failed
+ */
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
+{
+       struct slub_percpu_sheaves *pcs;
+       struct slab_sheaf *sheaf = NULL;
+
+       if (unlikely(size > s->sheaf_capacity)) {
+
+               /*
+                * slab_debug disables cpu sheaves intentionally so all
+                * prefilled sheaves become "oversize" and we give up on
+                * performance for the debugging. Same with SLUB_TINY.
+                * Creating a cache without sheaves and then requesting a
+                * prefilled sheaf is however not expected, so warn.
+                */
+               WARN_ON_ONCE(s->sheaf_capacity == 0 &&
+                            !IS_ENABLED(CONFIG_SLUB_TINY) &&
+                            !(s->flags & SLAB_DEBUG_FLAGS));
+
+               sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
+               if (!sheaf)
+                       return NULL;
+
+               stat(s, SHEAF_PREFILL_OVERSIZE);
+               sheaf->cache = s;
+               sheaf->capacity = size;
+
+               if (!__kmem_cache_alloc_bulk(s, gfp, size,
+                                            &sheaf->objects[0])) {
+                       kfree(sheaf);
+                       return NULL;
+               }
+
+               sheaf->size = size;
+
+               return sheaf;
+       }
+
+       local_lock(&s->cpu_sheaves->lock);
+       pcs = this_cpu_ptr(s->cpu_sheaves);
+
+       if (pcs->spare) {
+               sheaf = pcs->spare;
+               pcs->spare = NULL;
+               stat(s, SHEAF_PREFILL_FAST);
+       } else {
+               stat(s, SHEAF_PREFILL_SLOW);
+               sheaf = barn_get_full_or_empty_sheaf(pcs->barn);
+               if (sheaf && sheaf->size)
+                       stat(s, BARN_GET);
+               else
+                       stat(s, BARN_GET_FAIL);
+       }
+
+       local_unlock(&s->cpu_sheaves->lock);
+
+
+       if (!sheaf)
+               sheaf = alloc_empty_sheaf(s, gfp);
+
+       if (sheaf && sheaf->size < size) {
+               if (refill_sheaf(s, sheaf, gfp)) {
+                       sheaf_flush_unused(s, sheaf);
+                       free_empty_sheaf(s, sheaf);
+                       sheaf = NULL;
+               }
+       }
+
+       if (sheaf)
+               sheaf->capacity = s->sheaf_capacity;
+
+       return sheaf;
+}
+
+/*
+ * Use this to return a sheaf obtained by kmem_cache_prefill_sheaf()
+ *
+ * If the sheaf cannot simply become the percpu spare sheaf, but there's space
+ * for a full sheaf in the barn, we try to refill the sheaf back to the cache's
+ * sheaf_capacity to avoid handling partially full sheaves.
+ *
+ * If the refill fails because gfp is e.g. GFP_NOWAIT, or the barn is full, the
+ * sheaf is instead flushed and freed.
+ */
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+                            struct slab_sheaf *sheaf)
+{
+       struct slub_percpu_sheaves *pcs;
+       struct node_barn *barn;
+
+       if (unlikely(sheaf->capacity != s->sheaf_capacity)) {
+               sheaf_flush_unused(s, sheaf);
+               kfree(sheaf);
+               return;
+       }
+
+       local_lock(&s->cpu_sheaves->lock);
+       pcs = this_cpu_ptr(s->cpu_sheaves);
+
+       if (!pcs->spare) {
+               pcs->spare = sheaf;
+               sheaf = NULL;
+               stat(s, SHEAF_RETURN_FAST);
+       }
+
+       local_unlock(&s->cpu_sheaves->lock);
+
+       if (!sheaf)
+               return;
+
+       stat(s, SHEAF_RETURN_SLOW);
+
+       /* Accessing pcs->barn outside local_lock is safe. */
+       barn = pcs->barn;
+
+       /*
+        * If the barn has too many full sheaves or we fail to refill the sheaf,
+        * simply flush and free it.
+        */
+       if (data_race(pcs->barn->nr_full) >= MAX_FULL_SHEAVES ||
+           refill_sheaf(s, sheaf, gfp)) {
+               sheaf_flush_unused(s, sheaf);
+               free_empty_sheaf(s, sheaf);
+               return;
+       }
+
+       barn_put_full_sheaf(barn, sheaf);
+       stat(s, BARN_PUT);
+}
+
+/*
+ * refill a sheaf previously returned by kmem_cache_prefill_sheaf to at least
+ * the given size
+ *
+ * the sheaf might be replaced by a new one when requesting more than
+ * s->sheaf_capacity objects if such replacement is necessary, but the refill
+ * fails (returning -ENOMEM), the existing sheaf is left intact
+ *
+ * In practice we always refill to full sheaf's capacity.
+ */
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+                           struct slab_sheaf **sheafp, unsigned int size)
+{
+       struct slab_sheaf *sheaf;
+
+       /*
+        * TODO: do we want to support *sheaf == NULL to be equivalent of
+        * kmem_cache_prefill_sheaf() ?
+        */
+       if (!sheafp || !(*sheafp))
+               return -EINVAL;
+
+       sheaf = *sheafp;
+       if (sheaf->size >= size)
+               return 0;
+
+       if (likely(sheaf->capacity >= size)) {
+               if (likely(sheaf->capacity == s->sheaf_capacity))
+                       return refill_sheaf(s, sheaf, gfp);
+
+               if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size,
+                                            &sheaf->objects[sheaf->size])) {
+                       return -ENOMEM;
+               }
+               sheaf->size = sheaf->capacity;
+
+               return 0;
+       }
+
+       /*
+        * We had a regular sized sheaf and need an oversize one, or we had an
+        * oversize one already but need a larger one now.
+        * This should be a very rare path so let's not complicate it.
+        */
+       sheaf = kmem_cache_prefill_sheaf(s, gfp, size);
+       if (!sheaf)
+               return -ENOMEM;
+
+       kmem_cache_return_sheaf(s, gfp, *sheafp);
+       *sheafp = sheaf;
+       return 0;
+}
+
+/*
+ * Allocate from a sheaf obtained by kmem_cache_prefill_sheaf()
+ *
+ * Guaranteed not to fail as many allocations as was the requested size.
+ * After the sheaf is emptied, it fails - no fallback to the slab cache itself.
+ *
+ * The gfp parameter is meant only to specify __GFP_ZERO or __GFP_ACCOUNT
+ * memcg charging is forced over limit if necessary, to avoid failure.
+ */
+void *
+kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
+                                  struct slab_sheaf *sheaf)
+{
+       void *ret = NULL;
+       bool init;
+
+       if (sheaf->size == 0)
+               goto out;
+
+       ret = sheaf->objects[--sheaf->size];
+
+       init = slab_want_init_on_alloc(gfp, s);
+
+       /* add __GFP_NOFAIL to force successful memcg charging */
+       slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size);
+out:
+       trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE);
+
+       return ret;
+}
+
+unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
+{
+       return sheaf->size;
+}
  /*
   * To avoid unnecessary overhead, we pass through large allocation requests
   * directly to the page allocator. We use __GFP_COMP, because we will need to
@@ -8464,6 +8719,11 @@ STAT_ATTR(BARN_GET, barn_get);
  STAT_ATTR(BARN_GET_FAIL, barn_get_fail);
  STAT_ATTR(BARN_PUT, barn_put);
  STAT_ATTR(BARN_PUT_FAIL, barn_put_fail);
+STAT_ATTR(SHEAF_PREFILL_FAST, sheaf_prefill_fast);
+STAT_ATTR(SHEAF_PREFILL_SLOW, sheaf_prefill_slow);
+STAT_ATTR(SHEAF_PREFILL_OVERSIZE, sheaf_prefill_oversize);
+STAT_ATTR(SHEAF_RETURN_FAST, sheaf_return_fast);
+STAT_ATTR(SHEAF_RETURN_SLOW, sheaf_return_slow);
  #endif /* CONFIG_SLUB_STATS */
  
  #ifdef CONFIG_KFENCE
@@ -8564,6 +8824,11 @@ static struct attribute *slab_attrs[] = {
         &barn_get_fail_attr.attr,
         &barn_put_attr.attr,
         &barn_put_fail_attr.attr,
+       &sheaf_prefill_fast_attr.attr,
+       &sheaf_prefill_slow_attr.attr,
+       &sheaf_prefill_oversize_attr.attr,
+       &sheaf_return_fast_attr.attr,
+       &sheaf_return_slow_attr.attr,
  #endif
  #ifdef CONFIG_FAILSLAB
         &failslab_attr.attr,
author	Vlastimil Babka <vbabka@suse.cz>
	Tue, 5 Nov 2024 16:00:08 +0000 (17:00 +0100)
committer	Vlastimil Babka <vbabka@suse.cz>
	Wed, 11 Jun 2025 14:03:38 +0000 (16:03 +0200)
include/linux/slab.h		patch \| blob \| history
mm/slub.c		patch \| blob \| history