ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_PCS, /* Free to percpu sheaf */
+ FREE_RCU_SHEAF, /* Free to rcu_free sheaf */
+ FREE_RCU_SHEAF_FAIL, /* Failed to free to a rcu_free sheaf */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
struct rcu_head rcu_head;
struct list_head barn_list;
};
+ struct kmem_cache *cache;
unsigned int size;
void *objects[];
};
local_trylock_t lock;
struct slab_sheaf *main; /* never NULL when unlocked */
struct slab_sheaf *spare; /* empty or full, may be NULL */
+ struct slab_sheaf *rcu_free; /* for batching kfree_rcu() */
struct node_barn *barn;
};
if (unlikely(!sheaf))
return NULL;
+ sheaf->cache = s;
+
stat(s, SHEAF_ALLOC);
return sheaf;
sheaf->size = 0;
}
+static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
+ struct slab_sheaf *sheaf)
+{
+ bool init = slab_want_init_on_free(s);
+ void **p = &sheaf->objects[0];
+ unsigned int i = 0;
+
+ while (i < sheaf->size) {
+ struct slab *slab = virt_to_slab(p[i]);
+
+ memcg_slab_free_hook(s, slab, p + i, 1);
+ alloc_tagging_slab_free_hook(s, slab, p + i, 1);
+
+ if (unlikely(!slab_free_hook(s, p[i], init, true))) {
+ p[i] = p[--sheaf->size];
+ continue;
+ }
+
+ i++;
+ }
+}
+
+static void rcu_free_sheaf_nobarn(struct rcu_head *head)
+{
+ struct slab_sheaf *sheaf;
+ struct kmem_cache *s;
+
+ sheaf = container_of(head, struct slab_sheaf, rcu_head);
+ s = sheaf->cache;
+
+ __rcu_free_sheaf_prepare(s, sheaf);
+
+ sheaf_flush_unused(s, sheaf);
+
+ free_empty_sheaf(s, sheaf);
+}
+
/*
* Caller needs to make sure migration is disabled in order to fully flush
* single cpu's sheaves
static void pcs_flush_all(struct kmem_cache *s)
{
struct slub_percpu_sheaves *pcs;
- struct slab_sheaf *spare;
+ struct slab_sheaf *spare, *rcu_free;
local_lock(&s->cpu_sheaves->lock);
pcs = this_cpu_ptr(s->cpu_sheaves);
spare = pcs->spare;
pcs->spare = NULL;
+ rcu_free = pcs->rcu_free;
+ pcs->rcu_free = NULL;
+
local_unlock(&s->cpu_sheaves->lock);
if (spare) {
free_empty_sheaf(s, spare);
}
+ if (rcu_free)
+ call_rcu(&rcu_free->rcu_head, rcu_free_sheaf_nobarn);
+
sheaf_flush_main(s);
}
free_empty_sheaf(s, pcs->spare);
pcs->spare = NULL;
}
+
+ if (pcs->rcu_free) {
+ call_rcu(&pcs->rcu_free->rcu_head, rcu_free_sheaf_nobarn);
+ pcs->rcu_free = NULL;
+ }
}
static void pcs_destroy(struct kmem_cache *s)
*/
WARN_ON(pcs->spare);
+ WARN_ON(pcs->rcu_free);
if (!WARN_ON(pcs->main->size)) {
free_empty_sheaf(s, pcs->main);
pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
- return (pcs->spare || pcs->main->size);
+ return (pcs->spare || pcs->rcu_free || pcs->main->size);
}
static void pcs_flush_all(struct kmem_cache *s);
return true;
}
+static void rcu_free_sheaf(struct rcu_head *head)
+{
+ struct slab_sheaf *sheaf;
+ struct node_barn *barn;
+ struct kmem_cache *s;
+
+ sheaf = container_of(head, struct slab_sheaf, rcu_head);
+
+ s = sheaf->cache;
+
+ /*
+ * This may remove some objects due to slab_free_hook() returning false,
+ * so that the sheaf might no longer be completely full. But it's easier
+ * to handle it as full (unless it became completely empty), as the code
+ * handles it fine. The only downside is that sheaf will serve fewer
+ * allocations when reused. It only happens due to debugging, which is a
+ * performance hit anyway.
+ */
+ __rcu_free_sheaf_prepare(s, sheaf);
+
+ barn = get_node(s, numa_mem_id())->barn;
+
+ /* due to slab_free_hook() */
+ if (unlikely(sheaf->size == 0))
+ goto empty;
+
+ /*
+ * Checking nr_full/nr_empty outside lock avoids contention in case the
+ * barn is at the respective limit. Due to the race we might go over the
+ * limit but that should be rare and harmless.
+ */
+
+ if (data_race(barn->nr_full) < MAX_FULL_SHEAVES) {
+ stat(s, BARN_PUT);
+ barn_put_full_sheaf(barn, sheaf);
+ return;
+ }
+
+ stat(s, BARN_PUT_FAIL);
+ sheaf_flush_unused(s, sheaf);
+
+empty:
+ if (data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) {
+ barn_put_empty_sheaf(barn, sheaf);
+ return;
+ }
+
+ free_empty_sheaf(s, sheaf);
+}
+
+bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
+{
+ struct slub_percpu_sheaves *pcs;
+ struct slab_sheaf *rcu_sheaf;
+
+ if (!local_trylock(&s->cpu_sheaves->lock))
+ goto fail;
+
+ pcs = this_cpu_ptr(s->cpu_sheaves);
+
+ if (unlikely(!pcs->rcu_free)) {
+
+ struct slab_sheaf *empty;
+
+ if (pcs->spare && pcs->spare->size == 0) {
+ pcs->rcu_free = pcs->spare;
+ pcs->spare = NULL;
+ goto do_free;
+ }
+
+ empty = barn_get_empty_sheaf(pcs->barn);
+
+ if (empty) {
+ pcs->rcu_free = empty;
+ goto do_free;
+ }
+
+ local_unlock(&s->cpu_sheaves->lock);
+
+ empty = alloc_empty_sheaf(s, GFP_NOWAIT);
+
+ if (!empty)
+ goto fail;
+
+ if (!local_trylock(&s->cpu_sheaves->lock)) {
+ barn_put_empty_sheaf(pcs->barn, empty);
+ goto fail;
+ }
+
+ pcs = this_cpu_ptr(s->cpu_sheaves);
+
+ if (unlikely(pcs->rcu_free))
+ barn_put_empty_sheaf(pcs->barn, empty);
+ else
+ pcs->rcu_free = empty;
+ }
+
+do_free:
+
+ rcu_sheaf = pcs->rcu_free;
+
+ rcu_sheaf->objects[rcu_sheaf->size++] = obj;
+
+ if (likely(rcu_sheaf->size < s->sheaf_capacity))
+ rcu_sheaf = NULL;
+ else
+ pcs->rcu_free = NULL;
+
+ local_unlock(&s->cpu_sheaves->lock);
+
+ if (rcu_sheaf)
+ call_rcu(&rcu_sheaf->rcu_head, rcu_free_sheaf);
+
+ stat(s, FREE_RCU_SHEAF);
+ return true;
+
+fail:
+ stat(s, FREE_RCU_SHEAF_FAIL);
+ return false;
+}
+
/*
* Bulk free objects to the percpu sheaves.
* Unlike free_to_pcs() this includes the calls to all necessary hooks
{
struct slub_percpu_sheaves *pcs;
struct slab_sheaf *main, *empty;
+ bool init = slab_want_init_on_free(s);
unsigned int batch, i = 0;
- bool init;
-
- init = slab_want_init_on_free(s);
while (i < size) {
struct slab *slab = virt_to_slab(p[i]);
struct kmem_cache_node *n;
flush_all_cpus_locked(s);
+
+ /* we might have rcu sheaves in flight */
+ if (s->cpu_sheaves)
+ rcu_barrier();
+
/* Attempt to free all objects */
for_each_kmem_cache_node(s, node, n) {
if (n->barn)
STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
STAT_ATTR(FREE_PCS, free_cpu_sheaf);
+STAT_ATTR(FREE_RCU_SHEAF, free_rcu_sheaf);
+STAT_ATTR(FREE_RCU_SHEAF_FAIL, free_rcu_sheaf_fail);
STAT_ATTR(FREE_FASTPATH, free_fastpath);
STAT_ATTR(FREE_SLOWPATH, free_slowpath);
STAT_ATTR(FREE_FROZEN, free_frozen);
&alloc_fastpath_attr.attr,
&alloc_slowpath_attr.attr,
&free_cpu_sheaf_attr.attr,
+ &free_rcu_sheaf_attr.attr,
+ &free_rcu_sheaf_fail_attr.attr,
&free_fastpath_attr.attr,
&free_slowpath_attr.attr,
&free_frozen_attr.attr,