]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
kmemleak: enable tracking for percpu pointers
authorPavel Tikhomirov <ptikhomirov@virtuozzo.com>
Thu, 25 Jul 2024 04:12:15 +0000 (12:12 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 2 Sep 2024 03:25:49 +0000 (20:25 -0700)
Patch series "kmemleak: support for percpu memory leak detect'.

This is a rework of this series:
https://lore.kernel.org/lkml/20200921020007.35803-1-chenjun102@huawei.com/

Originally I was investigating a percpu leak on our customer nodes and
having this functionality was a huge help, which lead to this fix [1].

So probably it's a good idea to have it in mainstream too, especially as
after [2] it became much easier to implement (we already have a separate
tree for percpu pointers).

[1] commit 0af8c09c89681 ("netfilter: x_tables: fix percpu counter block leak on error path when creating new netns")
[2] commit 39042079a0c24 ("kmemleak: avoid RCU stalls when freeing metadata for per-CPU pointers")

This patch (of 2):

This basically does:

- Add min_percpu_addr and max_percpu_addr to filter out unrelated data
  similar to min_addr and max_addr;

- Set min_count for percpu pointers to 1 to start tracking them;

- Calculate checksum of percpu area as xor of crc32 for each cpu;

- Split pointer lookup and update refs code into separate helper and use
  it twice: once as if the pointer is a virtual pointer and once as if
  it's percpu.

[ptikhomirov@virtuozzo.com: v2]
Link: https://lkml.kernel.org/r/20240731025526.157529-2-ptikhomirov@virtuozzo.com
Link: https://lkml.kernel.org/r/20240725041223.872472-1-ptikhomirov@virtuozzo.com
Link: https://lkml.kernel.org/r/20240725041223.872472-2-ptikhomirov@virtuozzo.com
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Chen Jun <chenjun102@huawei.com>
Cc: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/kmemleak.c

index 764b08100570b82ebf5ecd840a3a5705d5a31f6a..6b498c6d9c34a3609c3b74023b897eba68ea2fd0 100644 (file)
@@ -224,6 +224,10 @@ static int kmemleak_error;
 static unsigned long min_addr = ULONG_MAX;
 static unsigned long max_addr;
 
+/* minimum and maximum address that may be valid per-CPU pointers */
+static unsigned long min_percpu_addr = ULONG_MAX;
+static unsigned long max_percpu_addr;
+
 static struct task_struct *scan_thread;
 /* used to avoid reporting of recently allocated objects */
 static unsigned long jiffies_min_age;
@@ -294,13 +298,20 @@ static void hex_dump_object(struct seq_file *seq,
        const u8 *ptr = (const u8 *)object->pointer;
        size_t len;
 
-       if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
+       if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
                return;
 
+       if (object->flags & OBJECT_PERCPU)
+               ptr = (const u8 *)this_cpu_ptr((void __percpu *)object->pointer);
+
        /* limit the number of lines to HEX_MAX_LINES */
        len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
 
-       warn_or_seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
+       if (object->flags & OBJECT_PERCPU)
+               warn_or_seq_printf(seq, "  hex dump (first %zu bytes on cpu %d):\n",
+                                  len, raw_smp_processor_id());
+       else
+               warn_or_seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
        kasan_disable_current();
        warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE,
                             HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII);
@@ -695,10 +706,14 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
 
        untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
        /*
-        * Only update min_addr and max_addr with object
-        * storing virtual address.
+        * Only update min_addr and max_addr with object storing virtual
+        * address. And update min_percpu_addr max_percpu_addr for per-CPU
+        * objects.
         */
-       if (!(objflags & (OBJECT_PHYS | OBJECT_PERCPU))) {
+       if (objflags & OBJECT_PERCPU) {
+               min_percpu_addr = min(min_percpu_addr, untagged_ptr);
+               max_percpu_addr = max(max_percpu_addr, untagged_ptr + size);
+       } else if (!(objflags & OBJECT_PHYS)) {
                min_addr = min(min_addr, untagged_ptr);
                max_addr = max(max_addr, untagged_ptr + size);
        }
@@ -1055,12 +1070,8 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
 {
        pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size);
 
-       /*
-        * Percpu allocations are only scanned and not reported as leaks
-        * (min_count is set to 0).
-        */
        if (kmemleak_enabled && ptr && !IS_ERR(ptr))
-               create_object_percpu((unsigned long)ptr, size, 0, gfp);
+               create_object_percpu((unsigned long)ptr, size, 1, gfp);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
 
@@ -1304,12 +1315,23 @@ static bool update_checksum(struct kmemleak_object *object)
 {
        u32 old_csum = object->checksum;
 
-       if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
+       if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
                return false;
 
        kasan_disable_current();
        kcsan_disable_current();
-       object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
+       if (object->flags & OBJECT_PERCPU) {
+               unsigned int cpu;
+
+               object->checksum = 0;
+               for_each_possible_cpu(cpu) {
+                       void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu);
+
+                       object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size);
+               }
+       } else {
+               object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
+       }
        kasan_enable_current();
        kcsan_enable_current();
 
@@ -1340,6 +1362,64 @@ static void update_refs(struct kmemleak_object *object)
        }
 }
 
+static void pointer_update_refs(struct kmemleak_object *scanned,
+                        unsigned long pointer, unsigned int objflags)
+{
+       struct kmemleak_object *object;
+       unsigned long untagged_ptr;
+       unsigned long excess_ref;
+
+       untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
+       if (objflags & OBJECT_PERCPU) {
+               if (untagged_ptr < min_percpu_addr || untagged_ptr >= max_percpu_addr)
+                       return;
+       } else {
+               if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
+                       return;
+       }
+
+       /*
+        * No need for get_object() here since we hold kmemleak_lock.
+        * object->use_count cannot be dropped to 0 while the object
+        * is still present in object_tree_root and object_list
+        * (with updates protected by kmemleak_lock).
+        */
+       object = __lookup_object(pointer, 1, objflags);
+       if (!object)
+               return;
+       if (object == scanned)
+               /* self referenced, ignore */
+               return;
+
+       /*
+        * Avoid the lockdep recursive warning on object->lock being
+        * previously acquired in scan_object(). These locks are
+        * enclosed by scan_mutex.
+        */
+       raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+       /* only pass surplus references (object already gray) */
+       if (color_gray(object)) {
+               excess_ref = object->excess_ref;
+               /* no need for update_refs() if object already gray */
+       } else {
+               excess_ref = 0;
+               update_refs(object);
+       }
+       raw_spin_unlock(&object->lock);
+
+       if (excess_ref) {
+               object = lookup_object(excess_ref, 0);
+               if (!object)
+                       return;
+               if (object == scanned)
+                       /* circular reference, ignore */
+                       return;
+               raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+               update_refs(object);
+               raw_spin_unlock(&object->lock);
+       }
+}
+
 /*
  * Memory scanning is a long process and it needs to be interruptible. This
  * function checks whether such interrupt condition occurred.
@@ -1372,13 +1452,10 @@ static void scan_block(void *_start, void *_end,
        unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
        unsigned long *end = _end - (BYTES_PER_POINTER - 1);
        unsigned long flags;
-       unsigned long untagged_ptr;
 
        raw_spin_lock_irqsave(&kmemleak_lock, flags);
        for (ptr = start; ptr < end; ptr++) {
-               struct kmemleak_object *object;
                unsigned long pointer;
-               unsigned long excess_ref;
 
                if (scan_should_stop())
                        break;
@@ -1387,50 +1464,8 @@ static void scan_block(void *_start, void *_end,
                pointer = *(unsigned long *)kasan_reset_tag((void *)ptr);
                kasan_enable_current();
 
-               untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
-               if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
-                       continue;
-
-               /*
-                * No need for get_object() here since we hold kmemleak_lock.
-                * object->use_count cannot be dropped to 0 while the object
-                * is still present in object_tree_root and object_list
-                * (with updates protected by kmemleak_lock).
-                */
-               object = lookup_object(pointer, 1);
-               if (!object)
-                       continue;
-               if (object == scanned)
-                       /* self referenced, ignore */
-                       continue;
-
-               /*
-                * Avoid the lockdep recursive warning on object->lock being
-                * previously acquired in scan_object(). These locks are
-                * enclosed by scan_mutex.
-                */
-               raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
-               /* only pass surplus references (object already gray) */
-               if (color_gray(object)) {
-                       excess_ref = object->excess_ref;
-                       /* no need for update_refs() if object already gray */
-               } else {
-                       excess_ref = 0;
-                       update_refs(object);
-               }
-               raw_spin_unlock(&object->lock);
-
-               if (excess_ref) {
-                       object = lookup_object(excess_ref, 0);
-                       if (!object)
-                               continue;
-                       if (object == scanned)
-                               /* circular reference, ignore */
-                               continue;
-                       raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
-                       update_refs(object);
-                       raw_spin_unlock(&object->lock);
-               }
+               pointer_update_refs(scanned, pointer, 0);
+               pointer_update_refs(scanned, pointer, OBJECT_PERCPU);
        }
        raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
 }