From: Christoph Lameter Date: Fri, 20 Aug 2010 17:37:15 +0000 (-0500) Subject: slub: Dynamically size kmalloc cache allocations X-Git-Tag: v2.6.37-rc1~141^2~24 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=51df1142816e4;p=users%2Fwilly%2Flinux.git slub: Dynamically size kmalloc cache allocations kmalloc caches are statically defined and may take up a lot of space just because the sizes of the node array has to be dimensioned for the largest node count supported. This patch makes the size of the kmem_cache structure dynamic throughout by creating a kmem_cache slab cache for the kmem_cache objects. The bootstrap occurs by allocating the initial one or two kmem_cache objects from the page allocator. C2->C3 - Fix various issues indicated by David - Make create kmalloc_cache return a kmem_cache * pointer. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9f63538928c0..a6c43ec6a4a5 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -139,19 +139,16 @@ struct kmem_cache { #ifdef CONFIG_ZONE_DMA #define SLUB_DMA __GFP_DMA -/* Reserve extra caches for potential DMA use */ -#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT) #else /* Disable DMA functionality */ #define SLUB_DMA (__force gfp_t)0 -#define KMALLOC_CACHES SLUB_PAGE_SHIFT #endif /* * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES]; +extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -216,7 +213,7 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) if (index == 0) return NULL; - return &kmalloc_caches[index]; + return kmalloc_caches[index]; } void *kmem_cache_alloc(struct kmem_cache *, gfp_t); diff --git a/mm/slub.c b/mm/slub.c index e8c117595367..94fee96da0d2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -168,7 +168,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s) /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000UL /* Poison object */ -#define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */ static int kmem_size = sizeof(struct kmem_cache); @@ -178,7 +177,7 @@ static struct notifier_block slab_notifier; static enum { DOWN, /* No slab functionality available */ - PARTIAL, /* kmem_cache_open() works but kmalloc does not */ + PARTIAL, /* Kmem_cache_node works */ UP, /* Everything works but does not show up in sysfs */ SYSFS /* Sysfs up */ } slab_state = DOWN; @@ -2073,6 +2072,8 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) } #ifdef CONFIG_NUMA +static struct kmem_cache *kmem_cache_node; + /* * No kmalloc_node yet so do it by hand. We know that this is the first * slab on the node for this slabcache. There are no concurrent accesses @@ -2088,9 +2089,9 @@ static void early_kmem_cache_node_alloc(int node) struct kmem_cache_node *n; unsigned long flags; - BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); + BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, GFP_NOWAIT, node); + page = new_slab(kmem_cache_node, GFP_NOWAIT, node); BUG_ON(!page); if (page_to_nid(page) != node) { @@ -2102,15 +2103,15 @@ static void early_kmem_cache_node_alloc(int node) n = page->freelist; BUG_ON(!n); - page->freelist = get_freepointer(kmalloc_caches, n); + page->freelist = get_freepointer(kmem_cache_node, n); page->inuse++; - kmalloc_caches->node[node] = n; + kmem_cache_node->node[node] = n; #ifdef CONFIG_SLUB_DEBUG - init_object(kmalloc_caches, n, 1); - init_tracking(kmalloc_caches, n); + init_object(kmem_cache_node, n, 1); + init_tracking(kmem_cache_node, n); #endif - init_kmem_cache_node(n, kmalloc_caches); - inc_slabs_node(kmalloc_caches, node, page->objects); + init_kmem_cache_node(n, kmem_cache_node); + inc_slabs_node(kmem_cache_node, node, page->objects); /* * lockdep requires consistent irq usage for each lock @@ -2128,8 +2129,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = s->node[node]; + if (n) - kmem_cache_free(kmalloc_caches, n); + kmem_cache_free(kmem_cache_node, n); + s->node[node] = NULL; } } @@ -2145,7 +2148,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) early_kmem_cache_node_alloc(node); continue; } - n = kmem_cache_alloc_node(kmalloc_caches, + n = kmem_cache_alloc_node(kmem_cache_node, GFP_KERNEL, node); if (!n) { @@ -2498,11 +2501,13 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; +struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; EXPORT_SYMBOL(kmalloc_caches); +static struct kmem_cache *kmem_cache; + #ifdef CONFIG_ZONE_DMA -static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT]; +static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; #endif static int __init setup_slub_min_order(char *str) @@ -2541,9 +2546,13 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -static void create_kmalloc_cache(struct kmem_cache *s, - const char *name, int size, unsigned int flags) +static struct kmem_cache *__init create_kmalloc_cache(const char *name, + int size, unsigned int flags) { + struct kmem_cache *s; + + s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + /* * This function is called with IRQs disabled during early-boot on * single CPU so there's no need to take slub_lock here. @@ -2553,12 +2562,11 @@ static void create_kmalloc_cache(struct kmem_cache *s, goto panic; list_add(&s->list, &slab_caches); - - if (!sysfs_slab_add(s)) - return; + return s; panic: panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); + return NULL; } /* @@ -2613,10 +2621,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) #ifdef CONFIG_ZONE_DMA if (unlikely((flags & SLUB_DMA))) - return &kmalloc_dma_caches[index]; + return kmalloc_dma_caches[index]; #endif - return &kmalloc_caches[index]; + return kmalloc_caches[index]; } void *__kmalloc(size_t size, gfp_t flags) @@ -2940,46 +2948,113 @@ static int slab_memory_callback(struct notifier_block *self, * Basic setup of slabs *******************************************************************/ +/* + * Used for early kmem_cache structures that were allocated using + * the page allocator + */ + +static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) +{ + int node; + + list_add(&s->list, &slab_caches); + s->refcount = -1; + + for_each_node_state(node, N_NORMAL_MEMORY) { + struct kmem_cache_node *n = get_node(s, node); + struct page *p; + + if (n) { + list_for_each_entry(p, &n->partial, lru) + p->slab = s; + +#ifdef CONFIG_SLAB_DEBUG + list_for_each_entry(p, &n->full, lru) + p->slab = s; +#endif + } + } +} + void __init kmem_cache_init(void) { int i; int caches = 0; + struct kmem_cache *temp_kmem_cache; + int order; #ifdef CONFIG_NUMA + struct kmem_cache *temp_kmem_cache_node; + unsigned long kmalloc_size; + + kmem_size = offsetof(struct kmem_cache, node) + + nr_node_ids * sizeof(struct kmem_cache_node *); + + /* Allocate two kmem_caches from the page allocator */ + kmalloc_size = ALIGN(kmem_size, cache_line_size()); + order = get_order(2 * kmalloc_size); + kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); + /* * Must first have the slab cache available for the allocations of the * struct kmem_cache_node's. There is special bootstrap code in * kmem_cache_open for slab_state == DOWN. */ - create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), 0); - kmalloc_caches[0].refcount = -1; - caches++; + kmem_cache_node = (void *)kmem_cache + kmalloc_size; + + kmem_cache_open(kmem_cache_node, "kmem_cache_node", + sizeof(struct kmem_cache_node), + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); +#else + /* Allocate a single kmem_cache from the page allocator */ + kmem_size = sizeof(struct kmem_cache); + order = get_order(kmem_size); + kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); #endif /* Able to allocate the per node structures */ slab_state = PARTIAL; - /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 32) { - create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, 0); - caches++; - } - if (KMALLOC_MIN_SIZE <= 64) { - create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, 0); - caches++; - } + temp_kmem_cache = kmem_cache; + kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + memcpy(kmem_cache, temp_kmem_cache, kmem_size); - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { - create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, 0); - caches++; - } +#ifdef CONFIG_NUMA + /* + * Allocate kmem_cache_node properly from the kmem_cache slab. + * kmem_cache_node is separately allocated so no need to + * update any list pointers. + */ + temp_kmem_cache_node = kmem_cache_node; + kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); + + kmem_cache_bootstrap_fixup(kmem_cache_node); + + caches++; +#else + /* + * kmem_cache has kmem_cache_node embedded and we moved it! + * Update the list heads + */ + INIT_LIST_HEAD(&kmem_cache->local_node.partial); + list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial); +#ifdef CONFIG_SLUB_DEBUG + INIT_LIST_HEAD(&kmem_cache->local_node.full); + list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full); +#endif +#endif + kmem_cache_bootstrap_fixup(kmem_cache); + caches++; + /* Free temporary boot structure */ + free_pages((unsigned long)temp_kmem_cache, order); + + /* Now we can use the kmem_cache to allocate kmalloc slabs */ /* * Patch up the size_index table if we have strange large alignment @@ -3019,6 +3094,22 @@ void __init kmem_cache_init(void) size_index[size_index_elem(i)] = 8; } + /* Caches that are not of the two-to-the-power-of size */ + if (KMALLOC_MIN_SIZE <= 32) { + kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); + caches++; + } + + if (KMALLOC_MIN_SIZE <= 64) { + kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); + caches++; + } + + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); + caches++; + } + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ @@ -3026,30 +3117,24 @@ void __init kmem_cache_init(void) char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); BUG_ON(!s); - kmalloc_caches[i].name = s; + kmalloc_caches[i]->name = s; } #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif -#ifdef CONFIG_NUMA - kmem_size = offsetof(struct kmem_cache, node) + - nr_node_ids * sizeof(struct kmem_cache_node *); -#else - kmem_size = sizeof(struct kmem_cache); -#endif #ifdef CONFIG_ZONE_DMA - for (i = 1; i < SLUB_PAGE_SHIFT; i++) { - struct kmem_cache *s = &kmalloc_caches[i]; + for (i = 0; i < SLUB_PAGE_SHIFT; i++) { + struct kmem_cache *s = kmalloc_caches[i]; - if (s->size) { + if (s && s->size) { char *name = kasprintf(GFP_NOWAIT, "dma-kmalloc-%d", s->objsize); BUG_ON(!name); - create_kmalloc_cache(&kmalloc_dma_caches[i], - name, s->objsize, SLAB_CACHE_DMA); + kmalloc_dma_caches[i] = create_kmalloc_cache(name, + s->objsize, SLAB_CACHE_DMA); } } #endif