#define USE_LOCKLESS_FAST_PATH() (false)
#endif
+/* copy/pasted from mm/page_alloc.c */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
+/*
+ * On SMP, spin_trylock is sufficient protection.
+ * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
+ */
+#define pcp_trylock_prepare(flags) do { } while (0)
+#define pcp_trylock_finish(flag) do { } while (0)
+#else
+
+/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
+#define pcp_trylock_prepare(flags) local_irq_save(flags)
+#define pcp_trylock_finish(flags) local_irq_restore(flags)
+#endif
+
+/*
+ * Locking a pcp requires a PCP lookup followed by a spinlock. To avoid
+ * a migration causing the wrong PCP to be locked and remote memory being
+ * potentially allocated, pin the task to the CPU for the lookup+lock.
+ * preempt_disable is used on !RT because it is faster than migrate_disable.
+ * migrate_disable is used on RT because otherwise RT spinlock usage is
+ * interfered with and a high priority task cannot preempt the allocator.
+ */
+#ifndef CONFIG_PREEMPT_RT
+#define pcpu_task_pin() preempt_disable()
+#define pcpu_task_unpin() preempt_enable()
+#else
+#define pcpu_task_pin() migrate_disable()
+#define pcpu_task_unpin() migrate_enable()
+#endif
+
+/*
+ * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
+ * Return value should be used with equivalent unlock helper.
+ */
+#define pcpu_spin_lock(type, member, ptr) \
+({ \
+ type *_ret; \
+ pcpu_task_pin(); \
+ _ret = this_cpu_ptr(ptr); \
+ spin_lock(&_ret->member); \
+ _ret; \
+})
+
+#define pcpu_spin_trylock(type, member, ptr) \
+({ \
+ type *_ret; \
+ pcpu_task_pin(); \
+ _ret = this_cpu_ptr(ptr); \
+ if (!spin_trylock(&_ret->member)) { \
+ pcpu_task_unpin(); \
+ _ret = NULL; \
+ } \
+ _ret; \
+})
+
+#define pcpu_spin_unlock(member, ptr) \
+({ \
+ spin_unlock(&ptr->member); \
+ pcpu_task_unpin(); \
+})
+
+/* struct slub_percpu_array specific helpers. */
+#define pca_spin_lock(ptr) \
+ pcpu_spin_lock(struct slub_percpu_array, lock, ptr)
+
+#define pca_spin_trylock(ptr) \
+ pcpu_spin_trylock(struct slub_percpu_array, lock, ptr)
+
+#define pca_spin_unlock(ptr) \
+ pcpu_spin_unlock(lock, ptr)
+
#ifndef CONFIG_SLUB_TINY
#define __fastpath_inline __always_inline
#else
0, sizeof(void *));
}
+static bool refill_pca(struct kmem_cache *s, unsigned int count, gfp_t gfp);
+
+static __fastpath_inline
+void *alloc_from_pca(struct kmem_cache *s, gfp_t gfp)
+{
+ unsigned long __maybe_unused UP_flags;
+ struct slub_percpu_array *pca;
+ void *object;
+
+retry:
+ pcp_trylock_prepare(UP_flags);
+ pca = pca_spin_trylock(s->cpu_array);
+
+ if (unlikely(!pca)) {
+ pcp_trylock_finish(UP_flags);
+ return NULL;
+ }
+
+ if (unlikely(pca->used == 0)) {
+ unsigned int batch = pca->count / 2;
+
+ pca_spin_unlock(pca);
+ pcp_trylock_finish(UP_flags);
+
+ if (!gfpflags_allow_blocking(gfp) || in_irq())
+ return NULL;
+
+ if (refill_pca(s, batch, gfp))
+ goto retry;
+
+ return NULL;
+ }
+
+ object = pca->objects[--pca->used];
+
+ pca_spin_unlock(pca);
+ pcp_trylock_finish(UP_flags);
+
+ stat(s, ALLOC_PCA);
+
+ return object;
+}
+
+static __fastpath_inline
+int alloc_from_pca_bulk(struct kmem_cache *s, size_t size, void **p)
+{
+ unsigned long __maybe_unused UP_flags;
+ struct slub_percpu_array *pca;
+
+ pcp_trylock_prepare(UP_flags);
+ pca = pca_spin_trylock(s->cpu_array);
+
+ if (unlikely(!pca)) {
+ size = 0;
+ goto failed;
+ }
+
+ if (pca->used < size)
+ size = pca->used;
+
+ for (int i = size; i > 0;) {
+ p[--i] = pca->objects[--pca->used];
+ }
+
+ pca_spin_unlock(pca);
+ stat_add(s, ALLOC_PCA, size);
+
+failed:
+ pcp_trylock_finish(UP_flags);
+ return size;
+}
+
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
if (unlikely(object))
goto out;
- object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
+ if (s->cpu_array)
+ object = alloc_from_pca(s, gfpflags);
+
+ if (!object)
+ object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
maybe_wipe_obj_freeptr(s, object);
init = slab_want_init_on_alloc(gfpflags, s);
discard_slab(s, slab);
}
+static bool flush_pca(struct kmem_cache *s, unsigned int count);
+
+static __fastpath_inline
+bool free_to_pca(struct kmem_cache *s, void *object)
+{
+ unsigned long __maybe_unused UP_flags;
+ struct slub_percpu_array *pca;
+
+retry:
+ pcp_trylock_prepare(UP_flags);
+ pca = pca_spin_trylock(s->cpu_array);
+
+ if (!pca) {
+ pcp_trylock_finish(UP_flags);
+ return false;
+ }
+
+ if (pca->used == pca->count) {
+ unsigned int batch = pca->count / 2;
+
+ pca_spin_unlock(pca);
+ pcp_trylock_finish(UP_flags);
+
+ if (in_irq())
+ return false;
+
+ if (!flush_pca(s, batch))
+ return false;
+
+ goto retry;
+ }
+
+ pca->objects[pca->used++] = object;
+
+ pca_spin_unlock(pca);
+ pcp_trylock_finish(UP_flags);
+
+ stat(s, FREE_PCA);
+
+ return true;
+}
+
+static __fastpath_inline
+size_t free_to_pca_bulk(struct kmem_cache *s, size_t size, void **p)
+{
+ unsigned long __maybe_unused UP_flags;
+ struct slub_percpu_array *pca;
+ bool init;
+
+ pcp_trylock_prepare(UP_flags);
+ pca = pca_spin_trylock(s->cpu_array);
+
+ if (unlikely(!pca)) {
+ size = 0;
+ goto failed;
+ }
+
+ if (pca->count - pca->used < size)
+ size = pca->count - pca->used;
+
+ init = slab_want_init_on_free(s);
+
+ for (size_t i = 0; i < size; i++) {
+ if (likely(slab_free_hook(s, p[i], init)))
+ pca->objects[pca->used++] = p[i];
+ }
+
+ pca_spin_unlock(pca);
+ stat_add(s, FREE_PCA, size);
+
+failed:
+ pcp_trylock_finish(UP_flags);
+ return size;
+}
+
#ifndef CONFIG_SLUB_TINY
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
{
memcg_slab_free_hook(s, slab, &object, 1);
- if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s))))
+ return;
+
+ if (s->cpu_array)
+ free_to_pca(s, object);
+ else
do_slab_free(s, slab, object, object, 1, addr);
}
if (!size)
return;
+ /*
+ * In case the objects might need memcg_slab_free_hook(), skip the array
+ * because the hook is not effective with single objects and benefits
+ * from groups of objects from a single slab that the detached freelist
+ * builds. But once we build the detached freelist, it's wasteful to
+ * throw it away and put the objects into the array.
+ *
+ * XXX: This test could be cache-specific if it was not possible to use
+ * __GFP_ACCOUNT with caches that are not SLAB_ACCOUNT
+ */
+ if (s && s->cpu_array && !memcg_kmem_online()) {
+ size_t pca_freed = free_to_pca_bulk(s, size, p);
+
+ if (pca_freed == size)
+ return;
+
+ p += pca_freed;
+ size -= pca_freed;
+ }
+
do {
struct detached_freelist df;
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
- int i;
+ int from_pca = 0;
+ int allocated = 0;
struct obj_cgroup *objcg = NULL;
if (!size)
if (unlikely(!s))
return 0;
- i = __kmem_cache_alloc_bulk(s, flags, size, p);
+ if (s->cpu_array)
+ from_pca = alloc_from_pca_bulk(s, size, p);
+
+ if (from_pca < size) {
+ allocated = __kmem_cache_alloc_bulk(s, flags, size-from_pca,
+ p+from_pca);
+ if (allocated == 0 && from_pca > 0) {
+ __kmem_cache_free_bulk(s, from_pca, p);
+ }
+ }
+
+ allocated += from_pca;
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
- if (i != 0)
+ if (allocated != 0)
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size);
- return i;
+ return allocated;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
+static bool refill_pca(struct kmem_cache *s, unsigned int count, gfp_t gfp)
+{
+ void *objects[32];
+ unsigned int batch, allocated;
+ unsigned long __maybe_unused UP_flags;
+ struct slub_percpu_array *pca;
+
+bulk_alloc:
+ batch = min(count, 32U);
+
+ allocated = __kmem_cache_alloc_bulk(s, gfp, batch, &objects[0]);
+ if (!allocated)
+ return false;
+
+ pcp_trylock_prepare(UP_flags);
+ pca = pca_spin_trylock(s->cpu_array);
+ if (!pca) {
+ pcp_trylock_finish(UP_flags);
+ return false;
+ }
+
+ batch = min(allocated, pca->count - pca->used);
+
+ for (unsigned int i = 0; i < batch; i++) {
+ pca->objects[pca->used++] = objects[i];
+ }
+
+ pca_spin_unlock(pca);
+ pcp_trylock_finish(UP_flags);
+
+ stat_add(s, PCA_REFILL, batch);
+
+ /*
+ * We could have migrated to a different cpu or somebody else freed to the
+ * pca while we were bulk allocating, and now we have too many objects
+ */
+ if (batch < allocated) {
+ __kmem_cache_free_bulk(s, allocated - batch, &objects[batch]);
+ } else {
+ count -= batch;
+ if (count > 0)
+ goto bulk_alloc;
+ }
+
+ return true;
+}
+
+static bool flush_pca(struct kmem_cache *s, unsigned int count)
+{
+ void *objects[32];
+ unsigned int batch, remaining;
+ unsigned long __maybe_unused UP_flags;
+ struct slub_percpu_array *pca;
+
+next_batch:
+ batch = min(count, 32);
+
+ pcp_trylock_prepare(UP_flags);
+ pca = pca_spin_trylock(s->cpu_array);
+ if (!pca) {
+ pcp_trylock_finish(UP_flags);
+ return false;
+ }
+
+ batch = min(batch, pca->used);
+
+ for (unsigned int i = 0; i < batch; i++) {
+ objects[i] = pca->objects[--pca->used];
+ }
+
+ remaining = pca->used;
+
+ pca_spin_unlock(pca);
+ pcp_trylock_finish(UP_flags);
+
+ __kmem_cache_free_bulk(s, batch, &objects[0]);
+
+ stat_add(s, PCA_FLUSH, batch);
+
+ if (batch < count && remaining > 0) {
+ count -= batch;
+ goto next_batch;
+ }
+
+ return true;
+}
+
+/* Do not call from irq handler nor with irqs disabled */
+int kmem_cache_prefill_percpu_array(struct kmem_cache *s, unsigned int count,
+ gfp_t gfp)
+{
+ struct slub_percpu_array *pca;
+ unsigned int used;
+
+ lockdep_assert_no_hardirq();
+
+ if (!s->cpu_array)
+ return -EINVAL;
+
+ /* racy but we don't care */
+ pca = raw_cpu_ptr(s->cpu_array);
+
+ used = READ_ONCE(pca->used);
+
+ if (used >= count)
+ return 0;
+
+ if (pca->count < count)
+ return -EINVAL;
+
+ count -= used;
+
+ if (!refill_pca(s, count, gfp))
+ return -ENOMEM;
+
+ return 0;
+}
/*
* Object placement in a slab is made very easy because we always start at
return 0;
}
+int kmem_cache_setup_percpu_array(struct kmem_cache *s, unsigned int count)
+{
+ int cpu;
+
+ if (WARN_ON_ONCE(!(s->flags & SLAB_NO_MERGE)))
+ return -EINVAL;
+
+ s->cpu_array = __alloc_percpu(struct_size(s->cpu_array, objects, count),
+ sizeof(void *));
+
+ if (!s->cpu_array)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct slub_percpu_array *pca = per_cpu_ptr(s->cpu_array, cpu);
+
+ spin_lock_init(&pca->lock);
+ pca->count = count;
+ pca->used = 0;
+ }
+
+ return 0;
+}
+
#ifdef SLAB_SUPPORTS_SYSFS
static int count_inuse(struct slab *slab)
{
} \
SLAB_ATTR(text); \
+STAT_ATTR(ALLOC_PCA, alloc_cpu_cache);
STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
+STAT_ATTR(FREE_PCA, free_cpu_cache);
STAT_ATTR(FREE_FASTPATH, free_fastpath);
STAT_ATTR(FREE_SLOWPATH, free_slowpath);
STAT_ATTR(FREE_FROZEN, free_frozen);
STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
+STAT_ATTR(PCA_REFILL, cpu_cache_refill);
+STAT_ATTR(PCA_FLUSH, cpu_cache_flush);
#endif /* CONFIG_SLUB_STATS */
#ifdef CONFIG_KFENCE
&remote_node_defrag_ratio_attr.attr,
#endif
#ifdef CONFIG_SLUB_STATS
+ &alloc_cpu_cache_attr.attr,
&alloc_fastpath_attr.attr,
&alloc_slowpath_attr.attr,
+ &free_cpu_cache_attr.attr,
&free_fastpath_attr.attr,
&free_slowpath_attr.attr,
&free_frozen_attr.attr,
&cpu_partial_free_attr.attr,
&cpu_partial_node_attr.attr,
&cpu_partial_drain_attr.attr,
+ &cpu_cache_refill_attr.attr,
+ &cpu_cache_flush_attr.attr,
#endif
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,