#endif
enum stat_item {
+ ALLOC_PCA, /* Allocation from percpu array cache */
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
+ FREE_PCA, /* Free to percpu array cache */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
+ PCA_REFILL, /* Refilling empty percpu array cache */
+ PCA_FLUSH, /* Flushing full percpu array cache */
NR_SLUB_STAT_ITEMS
};
bool skip;
};
+static void flush_pca(struct kmem_cache *s);
+
/*
* Flush cpu slab.
*
sfw = container_of(w, struct slub_flush_work, work);
s = sfw->s;
+
+ if (s->cpu_array) {
+ struct slub_percpu_array *pca = this_cpu_ptr(s->cpu_array);
+
+ if (pca->size)
+ flush_pca(s);
+ }
+
c = this_cpu_ptr(s->cpu_slab);
if (c->slab)
return c->slab || slub_percpu_partial(c);
}
+static bool has_pca_used(int cpu, struct kmem_cache *s)
+{
+ struct slub_percpu_array *pca;
+
+ if (!s->cpu_array)
+ return false;
+
+ pca = per_cpu_ptr(s->cpu_array, cpu);
+
+ return (pca->size > 0);
+}
+
static DEFINE_MUTEX(flush_lock);
static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
for_each_online_cpu(cpu) {
sfw = &per_cpu(slub_flush, cpu);
- if (!has_cpu_slab(cpu, s)) {
+ if (!has_cpu_slab(cpu, s) && !has_pca_used(cpu, s)) {
sfw->skip = true;
continue;
}
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
+/*
+ * Maximum number of objects allocated or freed during a refill or flush batch
+ * when the percpu array is empty or full, respectively. Translates directly to
+ * an on-stack array size.
+ */
+#define PCA_BATCH_MAX 32U
+
+static void *refill_pca(struct kmem_cache *s, unsigned int count, gfp_t gfp,
+ bool want_obj);
+
+static __fastpath_inline
+void *alloc_from_pca(struct kmem_cache *s, gfp_t gfp)
+{
+ struct slub_percpu_array *pca;
+ unsigned long flags;
+ void *object;
+
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+
+ if (unlikely(pca->size == 0)) {
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+
+ if (!gfpflags_allow_blocking(gfp))
+ return NULL;
+
+ object = refill_pca(s, pca->capacity / 2, gfp, true);
+
+ if (IS_ERR(object))
+ return NULL;
+
+ return object;
+ }
+
+ object = pca->objects[--pca->size];
+
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+
+ stat(s, ALLOC_PCA);
+
+ return object;
+}
+
+static __fastpath_inline
+int alloc_from_pca_bulk(struct kmem_cache *s, size_t size, void **p)
+{
+ struct slub_percpu_array *pca;
+ unsigned long flags;
+
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+
+ if (pca->size < size)
+ size = pca->size;
+
+ pca->size -= size;
+ memcpy(p, pca->objects + pca->size, size * sizeof(void *));
+
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+ stat_add(s, ALLOC_PCA, size);
+
+ return size;
+}
+
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
if (unlikely(object))
goto out;
- object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
+ if (s->cpu_array && (node == NUMA_NO_NODE))
+ object = alloc_from_pca(s, gfpflags);
+
+ if (!object)
+ object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
maybe_wipe_obj_freeptr(s, object);
init = slab_want_init_on_alloc(gfpflags, s);
discard_slab(s, slab);
}
+static void __flush_pca(struct kmem_cache *s, struct slub_percpu_array *pca,
+ unsigned int count, unsigned long flags);
+
+/*
+ * Free an object to the percpu array.
+ * The object is expected to have passed slab_free_hook() already.
+ */
+static __fastpath_inline
+void free_to_pca(struct kmem_cache *s, void *object)
+{
+ struct slub_percpu_array *pca;
+ unsigned long flags;
+
+ stat(s, FREE_PCA);
+
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+
+ /*
+ * the array has the space for one extra pointer beyond capacity
+ * but we must then flush immediately without unlocking
+ */
+ pca->objects[pca->size++] = object;
+ if (unlikely(pca->size > pca->capacity))
+ __flush_pca(s, pca, pca->capacity / 2 + 1, flags);
+ else
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+}
+
+/*
+ * Bulk free objects to the percpu array.
+ * Unlike free_to_pca() this includes the calls to slab_free_hook() as that
+ * allows us to iterate the array of objects just once.
+ * There is no flushing. If the percpu array becomes full, anything over the
+ * capacity has to be freed to slabs directly.
+ *
+ * Returns how many objects were freed to the array.
+ */
+static __fastpath_inline
+size_t free_to_pca_bulk(struct kmem_cache *s, size_t size, void **p)
+{
+ struct slub_percpu_array *pca;
+ unsigned long flags;
+ bool init;
+
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+
+ if (pca->capacity - pca->size < size)
+ size = pca->capacity - pca->size;
+
+ init = slab_want_init_on_free(s);
+
+ for (size_t i = 0; i < size; i++) {
+ if (likely(slab_free_hook(s, p[i], init)))
+ pca->objects[pca->size++] = p[i];
+ }
+
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+
+ stat_add(s, FREE_PCA, size);
+
+ return size;
+}
+
#ifndef CONFIG_SLUB_TINY
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
{
memcg_slab_free_hook(s, slab, &object, 1);
- if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s))))
+ return;
+
+ if (s->cpu_array)
+ free_to_pca(s, object);
+ else
do_slab_free(s, slab, object, object, 1, addr);
}
if (!size)
return;
+ /*
+ * In case the objects might need memcg_slab_free_hook(), skip the array
+ * because the hook is not effective with single objects and benefits
+ * from groups of objects from a single slab that the detached freelist
+ * builds. But once we build the detached freelist, it's wasteful to
+ * throw it away and put the objects into the array.
+ *
+ * XXX: This test could be cache-specific if it was not possible to use
+ * __GFP_ACCOUNT with caches that are not SLAB_ACCOUNT
+ */
+ if (s && s->cpu_array && !memcg_kmem_online()) {
+ size_t pca_freed = free_to_pca_bulk(s, size, p);
+
+ if (pca_freed == size)
+ return;
+
+ p += pca_freed;
+ size -= pca_freed;
+ }
+
do {
struct detached_freelist df;
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
- int i;
struct obj_cgroup *objcg = NULL;
+ int allocated = 0;
+ int from_pca = 0;
if (!size)
return 0;
if (unlikely(!s))
return 0;
- i = __kmem_cache_alloc_bulk(s, flags, size, p);
+ if (s->cpu_array)
+ from_pca = alloc_from_pca_bulk(s, size, p);
+
+ if (from_pca < size) {
+ allocated = __kmem_cache_alloc_bulk(s, flags, size - from_pca,
+ p + from_pca);
+ /*
+ * If we ran out of memory, don't bother with freeing back to
+ * the percpu array, we have bigger problems.
+ */
+ if (unlikely(allocated == 0 && from_pca > 0)) {
+ __kmem_cache_free_bulk(s, from_pca, p);
+ from_pca = 0;
+ }
+ }
+
+ allocated += from_pca;
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
- if (likely(i != 0)) {
+ if (likely(allocated != 0)) {
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size);
} else {
memcg_slab_alloc_error_hook(s, size, objcg);
}
- return i;
+ return allocated;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
+/*
+ * Refill the per-cpu array from slabs.
+ *
+ * If want_obj is true, an object is returned in addition to filling the array
+ * and in that case it's returned even if the given count of objects could not
+ * be allocated, but a partial refill was successful.
+ *
+ * If want_obj is false, -ENOMEM is returned if the given count of objects could
+ * not be allocated, even if partial refill was successful.
+ */
+static void *refill_pca(struct kmem_cache *s, unsigned int count, gfp_t gfp,
+ bool want_obj)
+{
+ unsigned int batch, allocated;
+ struct slub_percpu_array *pca;
+ void *objects[PCA_BATCH_MAX];
+ unsigned long flags;
+ void *obj = NULL;
+
+ if (want_obj)
+ count++;
+
+next_batch:
+ batch = min(count, PCA_BATCH_MAX);
+ allocated = __kmem_cache_alloc_bulk(s, gfp, batch, &objects[0]);
+ if (!allocated) {
+ /*
+ * If we already have non-NULL obj, it means a previous batch
+ * succeeded and the caller does not care that the refill did
+ * not succeed completely.
+ */
+ return obj ? obj : ERR_PTR(-ENOMEM);
+ }
+
+ if (want_obj) {
+ obj = objects[--allocated];
+ count--;
+ }
+
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+
+ batch = min(allocated, pca->capacity - pca->size);
+
+ memcpy(pca->objects + pca->size, objects, batch * sizeof(void *));
+ pca->size += batch;
+
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+
+ stat_add(s, PCA_REFILL, batch);
+
+ /*
+ * We could have migrated to a different cpu or somebody else freed to
+ * pca while we were bulk allocating, and now we have too many objects.
+ */
+ if (batch < allocated) {
+ __kmem_cache_free_bulk(s, allocated - batch, &objects[batch]);
+ } else if (batch < count) {
+ count -= batch;
+ want_obj = false;
+ goto next_batch;
+ }
+
+ return obj;
+}
+
+/*
+ * Called with pca->lock locked and corresponding flags, returns unlocked.
+ */
+static void __flush_pca(struct kmem_cache *s, struct slub_percpu_array *pca,
+ unsigned int count, unsigned long flags)
+{
+
+ unsigned int batch, remaining;
+ void *objects[PCA_BATCH_MAX];
+
+next_batch:
+ batch = min(count, PCA_BATCH_MAX);
+
+ batch = min(batch, pca->size);
+
+ pca->size -= batch;
+ memcpy(objects, pca->objects + pca->size, batch * sizeof(void *));
+
+ remaining = pca->size;
+
+ local_unlock_irqrestore(&s->cpu_array->lock, flags);
+
+ __kmem_cache_free_bulk(s, batch, &objects[0]);
+
+ stat_add(s, PCA_FLUSH, batch);
+
+ if (batch < count && remaining > 0) {
+ count -= batch;
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+ goto next_batch;
+ }
+}
+
+static void flush_pca(struct kmem_cache *s)
+{
+ struct slub_percpu_array *pca;
+ unsigned long flags;
+
+ local_lock_irqsave(&s->cpu_array->lock, flags);
+ pca = this_cpu_ptr(s->cpu_array);
+
+ __flush_pca(s, pca, pca->capacity, flags);
+}
+
+
+int kmem_cache_prefill_percpu_array(struct kmem_cache *s, unsigned int count,
+ gfp_t gfp)
+{
+ struct slub_percpu_array *pca;
+ unsigned int size;
+ void *refill;
+
+ if (!gfpflags_allow_blocking(gfp))
+ return -EINVAL;
+
+ if (!s->cpu_array)
+ return -EINVAL;
+
+ /* racy but we don't care */
+ pca = raw_cpu_ptr(s->cpu_array);
+
+ if (pca->capacity < count)
+ return -EINVAL;
+
+ size = READ_ONCE(pca->size);
+ if (size >= count)
+ return 0;
+
+ /*
+ * If the existing size is less than desired count, do not refill only
+ * up to pca->size == count, as this way we could end up doing many
+ * small refills if only few of the prefilled objects actually end up
+ * being allocated and we refill the same small amount on the next
+ * prefill.
+ *
+ * Instead set our target prefilled size to the requested amount plus
+ * half of the remaining capacity. This matches the refill in
+ * alloc_from_pca() also done up to half the capacity.
+ */
+ count += (pca->capacity - count) / 2;
+ count -= size;
+
+ refill = refill_pca(s, count, gfp, false);
+ if (IS_ERR(refill))
+ return PTR_ERR(refill);
+
+ return 0;
+}
/*
* Object placement in a slab is made very easy because we always start at
void __kmem_cache_release(struct kmem_cache *s)
{
cache_random_seq_destroy(s);
+ if (s->cpu_array)
+ free_percpu(s->cpu_array);
#ifndef CONFIG_SLUB_TINY
free_percpu(s->cpu_slab);
#endif
return 0;
}
+/**
+ * kmem_cache_setup_percpu_array - Create a per-cpu array cache for the cache
+ * @s: The cache to add per-cpu array. Must be created with SLAB_NO_MERGE flag.
+ * @count: Size of the per-cpu array.
+ *
+ * After this call, allocations from the cache go through a percpu array. When
+ * it becomes empty, and gfp flags allow blocking, half is refilled with a bulk
+ * allocation. When it becomes full, half is flushed with a bulk free operation.
+ *
+ * The array cache does not distinguish NUMA nodes, so allocations via
+ * kmem_cache_alloc_node() with a node specified other than NUMA_NO_NODE will
+ * bypass the cache.
+ *
+ * Bulk allocation and free operations also try to use the array.
+ *
+ * kmem_cache_prefill_percpu_array() can be used to pre-fill the array cache
+ * before e.g. entering a restricted context. It is however not guaranteed that
+ * the caller will be able to subsequently consume the prefilled cache. Such
+ * failures should be however sufficiently rare so after the prefill,
+ * allocations using GFP_ATOMIC | __GFP_NOFAIL are acceptable for objects up to
+ * the prefilled amount.
+ *
+ * Limitations: when slub_debug is enabled for the cache, all relevant actions
+ * (i.e. poisoning, obtaining stacktraces) and checks happen when objects move
+ * between the array cache and slab pages, which may result in e.g. not
+ * detecting a use-after-free while the object is in the array cache, and the
+ * stacktraces may be less useful.
+ *
+ * Return: 0 if OK, -EINVAL on caches without SLAB_NO_MERGE or with the array
+ * already created, -ENOMEM when the per-cpu array creation fails.
+ */
+int kmem_cache_setup_percpu_array(struct kmem_cache *s, unsigned int count)
+{
+ int cpu;
+
+ if (WARN_ON_ONCE(!(s->flags & SLAB_NO_MERGE)))
+ return -EINVAL;
+
+ if (s->cpu_array)
+ return -EINVAL;
+
+ /*
+ * the object array is the desired count + 1 so we can fit the object
+ * that triggers the need for flushing
+ */
+ s->cpu_array = __alloc_percpu(struct_size(s->cpu_array, objects,
+ count + 1), sizeof(void *));
+
+ if (!s->cpu_array)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct slub_percpu_array *pca = per_cpu_ptr(s->cpu_array, cpu);
+
+ local_lock_init(&pca->lock);
+ pca->capacity = count;
+ pca->size = 0;
+ }
+
+ return 0;
+}
+
#ifdef SLAB_SUPPORTS_SYSFS
static int count_inuse(struct slab *slab)
{
} \
SLAB_ATTR(text); \
+STAT_ATTR(ALLOC_PCA, alloc_cpu_cache);
STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
+STAT_ATTR(FREE_PCA, free_cpu_cache);
STAT_ATTR(FREE_FASTPATH, free_fastpath);
STAT_ATTR(FREE_SLOWPATH, free_slowpath);
STAT_ATTR(FREE_FROZEN, free_frozen);
STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
+STAT_ATTR(PCA_REFILL, cpu_cache_refill);
+STAT_ATTR(PCA_FLUSH, cpu_cache_flush);
#endif /* CONFIG_SLUB_STATS */
#ifdef CONFIG_KFENCE
&remote_node_defrag_ratio_attr.attr,
#endif
#ifdef CONFIG_SLUB_STATS
+ &alloc_cpu_cache_attr.attr,
&alloc_fastpath_attr.attr,
&alloc_slowpath_attr.attr,
+ &free_cpu_cache_attr.attr,
&free_fastpath_attr.attr,
&free_slowpath_attr.attr,
&free_frozen_attr.attr,
&cpu_partial_free_attr.attr,
&cpu_partial_node_attr.attr,
&cpu_partial_drain_attr.attr,
+ &cpu_cache_refill_attr.attr,
+ &cpu_cache_flush_attr.attr,
#endif
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,