#include "vc4_drv.h"
 #include "uapi/drm/vc4_drm.h"
 
+static const char * const bo_type_names[] = {
+       "kernel",
+       "V3D",
+       "V3D shader",
+       "dumb",
+       "binner",
+       "RCL",
+       "BCL",
+       "kernel BO cache",
+};
+
+static bool is_user_label(int label)
+{
+       return label >= VC4_BO_TYPE_COUNT;
+}
+
 static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 {
-       DRM_INFO("num bos allocated: %d\n",
-                vc4->bo_stats.num_allocated);
-       DRM_INFO("size bos allocated: %dkb\n",
-                vc4->bo_stats.size_allocated / 1024);
-       DRM_INFO("num bos used: %d\n",
-                vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached);
-       DRM_INFO("size bos used: %dkb\n",
-                (vc4->bo_stats.size_allocated -
-                 vc4->bo_stats.size_cached) / 1024);
-       DRM_INFO("num bos cached: %d\n",
-                vc4->bo_stats.num_cached);
-       DRM_INFO("size bos cached: %dkb\n",
-                vc4->bo_stats.size_cached / 1024);
+       int i;
+
+       for (i = 0; i < vc4->num_labels; i++) {
+               if (!vc4->bo_labels[i].num_allocated)
+                       continue;
+
+               DRM_INFO("%30s: %6dkb BOs (%d)\n",
+                        vc4->bo_labels[i].name,
+                        vc4->bo_labels[i].size_allocated / 1024,
+                        vc4->bo_labels[i].num_allocated);
+       }
 }
 
 #ifdef CONFIG_DEBUG_FS
        struct drm_info_node *node = (struct drm_info_node *)m->private;
        struct drm_device *dev = node->minor->dev;
        struct vc4_dev *vc4 = to_vc4_dev(dev);
-       struct vc4_bo_stats stats;
+       int i;
 
-       /* Take a snapshot of the current stats with the lock held. */
        mutex_lock(&vc4->bo_lock);
-       stats = vc4->bo_stats;
+       for (i = 0; i < vc4->num_labels; i++) {
+               if (!vc4->bo_labels[i].num_allocated)
+                       continue;
+
+               seq_printf(m, "%30s: %6dkb BOs (%d)\n",
+                          vc4->bo_labels[i].name,
+                          vc4->bo_labels[i].size_allocated / 1024,
+                          vc4->bo_labels[i].num_allocated);
+       }
        mutex_unlock(&vc4->bo_lock);
 
-       seq_printf(m, "num bos allocated: %d\n",
-                  stats.num_allocated);
-       seq_printf(m, "size bos allocated: %dkb\n",
-                  stats.size_allocated / 1024);
-       seq_printf(m, "num bos used: %d\n",
-                  stats.num_allocated - stats.num_cached);
-       seq_printf(m, "size bos used: %dkb\n",
-                  (stats.size_allocated - stats.size_cached) / 1024);
-       seq_printf(m, "num bos cached: %d\n",
-                  stats.num_cached);
-       seq_printf(m, "size bos cached: %dkb\n",
-                  stats.size_cached / 1024);
-
        return 0;
 }
 #endif
 
+/* Takes ownership of *name and returns the appropriate slot for it in
+ * the bo_labels[] array, extending it as necessary.
+ *
+ * This is inefficient and could use a hash table instead of walking
+ * an array and strcmp()ing.  However, the assumption is that user
+ * labeling will be infrequent (scanout buffers and other long-lived
+ * objects, or debug driver builds), so we can live with it for now.
+ */
+static int vc4_get_user_label(struct vc4_dev *vc4, const char *name)
+{
+       int i;
+       int free_slot = -1;
+
+       for (i = 0; i < vc4->num_labels; i++) {
+               if (!vc4->bo_labels[i].name) {
+                       free_slot = i;
+               } else if (strcmp(vc4->bo_labels[i].name, name) == 0) {
+                       kfree(name);
+                       return i;
+               }
+       }
+
+       if (free_slot != -1) {
+               WARN_ON(vc4->bo_labels[free_slot].num_allocated != 0);
+               vc4->bo_labels[free_slot].name = name;
+               return free_slot;
+       } else {
+               u32 new_label_count = vc4->num_labels + 1;
+               struct vc4_label *new_labels =
+                       krealloc(vc4->bo_labels,
+                                new_label_count * sizeof(*new_labels),
+                                GFP_KERNEL);
+
+               if (!new_labels) {
+                       kfree(name);
+                       return -1;
+               }
+
+               free_slot = vc4->num_labels;
+               vc4->bo_labels = new_labels;
+               vc4->num_labels = new_label_count;
+
+               vc4->bo_labels[free_slot].name = name;
+               vc4->bo_labels[free_slot].num_allocated = 0;
+               vc4->bo_labels[free_slot].size_allocated = 0;
+
+               return free_slot;
+       }
+}
+
+static void vc4_bo_set_label(struct drm_gem_object *gem_obj, int label)
+{
+       struct vc4_bo *bo = to_vc4_bo(gem_obj);
+       struct vc4_dev *vc4 = to_vc4_dev(gem_obj->dev);
+
+       lockdep_assert_held(&vc4->bo_lock);
+
+       if (label != -1) {
+               vc4->bo_labels[label].num_allocated++;
+               vc4->bo_labels[label].size_allocated += gem_obj->size;
+       }
+
+       vc4->bo_labels[bo->label].num_allocated--;
+       vc4->bo_labels[bo->label].size_allocated -= gem_obj->size;
+
+       if (vc4->bo_labels[bo->label].num_allocated == 0 &&
+           is_user_label(bo->label)) {
+               /* Free user BO label slots on last unreference.
+                * Slots are just where we track the stats for a given
+                * name, and once a name is unused we can reuse that
+                * slot.
+                */
+               kfree(vc4->bo_labels[bo->label].name);
+               vc4->bo_labels[bo->label].name = NULL;
+       }
+
+       bo->label = label;
+}
+
 static uint32_t bo_page_index(size_t size)
 {
        return (size / PAGE_SIZE) - 1;
 static void vc4_bo_destroy(struct vc4_bo *bo)
 {
        struct drm_gem_object *obj = &bo->base.base;
-       struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
+
+       vc4_bo_set_label(obj, -1);
 
        if (bo->validated_shader) {
                kfree(bo->validated_shader->texture_samples);
                bo->validated_shader = NULL;
        }
 
-       vc4->bo_stats.num_allocated--;
-       vc4->bo_stats.size_allocated -= obj->size;
-
        reservation_object_fini(&bo->_resv);
 
        drm_gem_cma_free_object(obj);
 /* Must be called with bo_lock held. */
 static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
 {
-       struct drm_gem_object *obj = &bo->base.base;
-       struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
-
-       vc4->bo_stats.num_cached--;
-       vc4->bo_stats.size_cached -= obj->size;
-
        list_del(&bo->unref_head);
        list_del(&bo->size_head);
 }
 }
 
 static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
-                                           uint32_t size)
+                                           uint32_t size,
+                                           enum vc4_kernel_bo_type type)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        uint32_t page_index = bo_page_index(size);
        kref_init(&bo->base.base.refcount);
 
 out:
+       if (bo)
+               vc4_bo_set_label(&bo->base.base, type);
        mutex_unlock(&vc4->bo_lock);
        return bo;
 }
                return ERR_PTR(-ENOMEM);
 
        mutex_lock(&vc4->bo_lock);
-       vc4->bo_stats.num_allocated++;
-       vc4->bo_stats.size_allocated += size;
+       bo->label = VC4_BO_TYPE_KERNEL;
+       vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
+       vc4->bo_labels[VC4_BO_TYPE_KERNEL].size_allocated += size;
        mutex_unlock(&vc4->bo_lock);
        bo->resv = &bo->_resv;
        reservation_object_init(bo->resv);
 }
 
 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
-                            bool allow_unzeroed)
+                            bool allow_unzeroed, enum vc4_kernel_bo_type type)
 {
        size_t size = roundup(unaligned_size, PAGE_SIZE);
        struct vc4_dev *vc4 = to_vc4_dev(dev);
                return ERR_PTR(-EINVAL);
 
        /* First, try to get a vc4_bo from the kernel BO cache. */
-       bo = vc4_bo_get_from_cache(dev, size);
+       bo = vc4_bo_get_from_cache(dev, size, type);
        if (bo) {
                if (!allow_unzeroed)
                        memset(bo->base.vaddr, 0, bo->base.base.size);
                        return ERR_PTR(-ENOMEM);
                }
        }
-       return to_vc4_bo(&cma_obj->base);
+       bo = to_vc4_bo(&cma_obj->base);
+
+       mutex_lock(&vc4->bo_lock);
+       vc4_bo_set_label(&cma_obj->base, type);
+       mutex_unlock(&vc4->bo_lock);
+
+       return bo;
 }
 
 int vc4_dumb_create(struct drm_file *file_priv,
        if (args->size < args->pitch * args->height)
                args->size = args->pitch * args->height;
 
-       bo = vc4_bo_create(dev, args->size, false);
+       bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB);
        if (IS_ERR(bo))
                return PTR_ERR(bo);
 
        list_add(&bo->size_head, cache_list);
        list_add(&bo->unref_head, &vc4->bo_cache.time_list);
 
-       vc4->bo_stats.num_cached++;
-       vc4->bo_stats.size_cached += gem_bo->size;
+       vc4_bo_set_label(&bo->base.base, VC4_BO_TYPE_KERNEL_CACHE);
 
        vc4_bo_cache_free_old(dev);
 
         * We can't allocate from the BO cache, because the BOs don't
         * get zeroed, and that might leak data between users.
         */
-       bo = vc4_bo_create(dev, args->size, false);
+       bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_V3D);
        if (IS_ERR(bo))
                return PTR_ERR(bo);
 
                return -EINVAL;
        }
 
-       bo = vc4_bo_create(dev, args->size, true);
+       bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
        if (IS_ERR(bo))
                return PTR_ERR(bo);
 
        return 0;
 }
 
-void vc4_bo_cache_init(struct drm_device *dev)
+int vc4_bo_cache_init(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
+       int i;
+
+       /* Create the initial set of BO labels that the kernel will
+        * use.  This lets us avoid a bunch of string reallocation in
+        * the kernel's draw and BO allocation paths.
+        */
+       vc4->bo_labels = kcalloc(VC4_BO_TYPE_COUNT, sizeof(*vc4->bo_labels),
+                                GFP_KERNEL);
+       if (!vc4->bo_labels)
+               return -ENOMEM;
+       vc4->num_labels = VC4_BO_TYPE_COUNT;
+
+       BUILD_BUG_ON(ARRAY_SIZE(bo_type_names) != VC4_BO_TYPE_COUNT);
+       for (i = 0; i < VC4_BO_TYPE_COUNT; i++)
+               vc4->bo_labels[i].name = bo_type_names[i];
 
        mutex_init(&vc4->bo_lock);
 
        setup_timer(&vc4->bo_cache.time_timer,
                    vc4_bo_cache_time_timer,
                    (unsigned long)dev);
+
+       return 0;
 }
 
 void vc4_bo_cache_destroy(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
+       int i;
 
        del_timer(&vc4->bo_cache.time_timer);
        cancel_work_sync(&vc4->bo_cache.time_work);
 
        vc4_bo_cache_purge(dev);
 
-       if (vc4->bo_stats.num_allocated) {
-               DRM_ERROR("Destroying BO cache while BOs still allocated:\n");
-               vc4_bo_stats_dump(vc4);
+       for (i = 0; i < vc4->num_labels; i++) {
+               if (vc4->bo_labels[i].num_allocated) {
+                       DRM_ERROR("Destroying BO cache with %d %s "
+                                 "BOs still allocated\n",
+                                 vc4->bo_labels[i].num_allocated,
+                                 vc4->bo_labels[i].name);
+               }
+
+               if (is_user_label(i))
+                       kfree(vc4->bo_labels[i].name);
        }
+       kfree(vc4->bo_labels);
+}
+
+int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_priv)
+{
+       struct vc4_dev *vc4 = to_vc4_dev(dev);
+       struct drm_vc4_label_bo *args = data;
+       char *name;
+       struct drm_gem_object *gem_obj;
+       int ret = 0, label;
+
+       if (!args->len)
+               return -EINVAL;
+
+       name = strndup_user(u64_to_user_ptr(args->name), args->len + 1);
+       if (IS_ERR(name))
+               return PTR_ERR(name);
+
+       gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+       if (!gem_obj) {
+               DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+               kfree(name);
+               return -ENOENT;
+       }
+
+       mutex_lock(&vc4->bo_lock);
+       label = vc4_get_user_label(vc4, name);
+       if (label != -1)
+               vc4_bo_set_label(gem_obj, label);
+       else
+               ret = -ENOMEM;
+       mutex_unlock(&vc4->bo_lock);
+
+       drm_gem_object_unreference_unlocked(gem_obj);
+
+       return ret;
 }
 
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem_cma_helper.h>
 
+/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
+ * this.
+ */
+enum vc4_kernel_bo_type {
+       /* Any kernel allocation (gem_create_object hook) before it
+        * gets another type set.
+        */
+       VC4_BO_TYPE_KERNEL,
+       VC4_BO_TYPE_V3D,
+       VC4_BO_TYPE_V3D_SHADER,
+       VC4_BO_TYPE_DUMB,
+       VC4_BO_TYPE_BIN,
+       VC4_BO_TYPE_RCL,
+       VC4_BO_TYPE_BCL,
+       VC4_BO_TYPE_KERNEL_CACHE,
+       VC4_BO_TYPE_COUNT
+};
+
 struct vc4_dev {
        struct drm_device *dev;
 
                struct timer_list time_timer;
        } bo_cache;
 
-       struct vc4_bo_stats {
+       u32 num_labels;
+       struct vc4_label {
+               const char *name;
                u32 num_allocated;
                u32 size_allocated;
-               u32 num_cached;
-               u32 size_cached;
-       } bo_stats;
+       } *bo_labels;
 
-       /* Protects bo_cache and the BO stats. */
+       /* Protects bo_cache and bo_labels. */
        struct mutex bo_lock;
 
        uint64_t dma_fence_context;
        /* normally (resv == &_resv) except for imported bo's */
        struct reservation_object *resv;
        struct reservation_object _resv;
+
+       /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i
+        * for user-allocated labels.
+        */
+       int label;
 };
 
 static inline struct vc4_bo *
 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
 void vc4_free_object(struct drm_gem_object *gem_obj);
 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
-                            bool from_cache);
+                            bool from_cache, enum vc4_kernel_bo_type type);
 int vc4_dumb_create(struct drm_file *file_priv,
                    struct drm_device *dev,
                    struct drm_mode_create_dumb *args);
                         struct drm_file *file_priv);
 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
                             struct drm_file *file_priv);
+int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_priv);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
 struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
                                                 struct dma_buf_attachment *attach,
                                                 struct sg_table *sgt);
 void *vc4_prime_vmap(struct drm_gem_object *obj);
-void vc4_bo_cache_init(struct drm_device *dev);
+int vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
 int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);