static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
                               u32 offset, u32 words)
 {
+       struct device *dev = cdma_to_channel(cdma)->dev;
        void *mem = NULL;
 
        if (host1x_debug_trace_cmdbuf)
                 * of how much you can output to ftrace at once.
                 */
                for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
-                       trace_host1x_cdma_push_gather(
-                               dev_name(cdma_to_channel(cdma)->dev),
-                               (u32)bo, min(words - i, TRACE_MAX_LENGTH),
-                               offset + i * sizeof(u32), mem);
+                       u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+                       offset += i * sizeof(u32);
+
+                       trace_host1x_cdma_push_gather(dev_name(dev), bo,
+                                                     num_words, offset,
+                                                     mem);
                }
+
                host1x_bo_munmap(bo, mem);
        }
 }
 
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
+struct host1x_bo;
+
 DECLARE_EVENT_CLASS(host1x,
        TP_PROTO(const char *name),
        TP_ARGS(name),
 );
 
 TRACE_EVENT(host1x_cdma_push_gather,
-       TP_PROTO(const char *name, u32 mem_id,
+       TP_PROTO(const char *name, struct host1x_bo *bo,
                        u32 words, u32 offset, void *cmdbuf),
 
-       TP_ARGS(name, mem_id, words, offset, cmdbuf),
+       TP_ARGS(name, bo, words, offset, cmdbuf),
 
        TP_STRUCT__entry(
                __field(const char *, name)
-               __field(u32, mem_id)
+               __field(struct host1x_bo *, bo)
                __field(u32, words)
                __field(u32, offset)
                __field(bool, cmdbuf)
                }
                __entry->cmdbuf = cmdbuf;
                __entry->name = name;
-               __entry->mem_id = mem_id;
+               __entry->bo = bo;
                __entry->words = words;
                __entry->offset = offset;
        ),
 
-       TP_printk("name=%s, mem_id=%08x, words=%u, offset=%d, contents=[%s]",
-         __entry->name, __entry->mem_id,
+       TP_printk("name=%s, bo=%p, words=%u, offset=%d, contents=[%s]",
+         __entry->name, __entry->bo,
          __entry->words, __entry->offset,
          __print_hex(__get_dynamic_array(cmdbuf),
                  __entry->cmdbuf ? __entry->words * 4 : 0))
 );
 
 TRACE_EVENT(host1x_syncpt_wait_check,
-       TP_PROTO(void *mem_id, u32 offset, u32 syncpt_id, u32 thresh, u32 min),
+       TP_PROTO(struct host1x_bo *bo, u32 offset, u32 syncpt_id, u32 thresh,
+                u32 min),
 
-       TP_ARGS(mem_id, offset, syncpt_id, thresh, min),
+       TP_ARGS(bo, offset, syncpt_id, thresh, min),
 
        TP_STRUCT__entry(
-               __field(void *, mem_id)
+               __field(struct host1x_bo *, bo)
                __field(u32, offset)
                __field(u32, syncpt_id)
                __field(u32, thresh)
        ),
 
        TP_fast_assign(
-               __entry->mem_id = mem_id;
+               __entry->bo = bo;
                __entry->offset = offset;
                __entry->syncpt_id = syncpt_id;
                __entry->thresh = thresh;
                __entry->min = min;
        ),
 
-       TP_printk("mem_id=%p, offset=%05x, id=%d, thresh=%d, current=%d",
-               __entry->mem_id, __entry->offset,
+       TP_printk("bo=%p, offset=%05x, id=%d, thresh=%d, current=%d",
+               __entry->bo, __entry->offset,
                __entry->syncpt_id, __entry->thresh,
                __entry->min)
 );