#define _TRACE_PERCPU_H
 
 #include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
 
 TRACE_EVENT(percpu_alloc_percpu,
 
-       TP_PROTO(bool reserved, bool is_atomic, size_t size,
-                size_t align, void *base_addr, int off, void __percpu *ptr),
+       TP_PROTO(unsigned long call_site,
+                bool reserved, bool is_atomic, size_t size,
+                size_t align, void *base_addr, int off,
+                void __percpu *ptr, size_t bytes_alloc, gfp_t gfp_flags),
 
-       TP_ARGS(reserved, is_atomic, size, align, base_addr, off, ptr),
+       TP_ARGS(call_site, reserved, is_atomic, size, align, base_addr, off,
+               ptr, bytes_alloc, gfp_flags),
 
        TP_STRUCT__entry(
+               __field(        unsigned long,          call_site       )
                __field(        bool,                   reserved        )
                __field(        bool,                   is_atomic       )
                __field(        size_t,                 size            )
                __field(        void *,                 base_addr       )
                __field(        int,                    off             )
                __field(        void __percpu *,        ptr             )
+               __field(        size_t,                 bytes_alloc     )
+               __field(        gfp_t,                  gfp_flags       )
        ),
-
        TP_fast_assign(
+               __entry->call_site      = call_site;
                __entry->reserved       = reserved;
                __entry->is_atomic      = is_atomic;
                __entry->size           = size;
                __entry->base_addr      = base_addr;
                __entry->off            = off;
                __entry->ptr            = ptr;
+               __entry->bytes_alloc    = bytes_alloc;
+               __entry->gfp_flags      = gfp_flags;
        ),
 
-       TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p",
+       TP_printk("call_site=%pS reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p bytes_alloc=%zu gfp_flags=%s",
+                 (void *)__entry->call_site,
                  __entry->reserved, __entry->is_atomic,
                  __entry->size, __entry->align,
-                 __entry->base_addr, __entry->off, __entry->ptr)
+                 __entry->base_addr, __entry->off, __entry->ptr,
+                 __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags))
 );
 
 TRACE_EVENT(percpu_free_percpu,
 
        return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
 }
 
-#ifdef CONFIG_MEMCG_KMEM
 /**
  * pcpu_obj_full_size - helper to calculate size of each accounted object
  * @size: size of area to allocate in bytes
  */
 static inline size_t pcpu_obj_full_size(size_t size)
 {
-       size_t extra_size;
+       size_t extra_size = 0;
 
-       extra_size = size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *);
+#ifdef CONFIG_MEMCG_KMEM
+       extra_size += size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *);
+#endif
 
        return size * num_possible_cpus() + extra_size;
 }
-#endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_PERCPU_STATS
 
 
        ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
        kmemleak_alloc_percpu(ptr, size, gfp);
 
-       trace_percpu_alloc_percpu(reserved, is_atomic, size, align,
-                       chunk->base_addr, off, ptr);
+       trace_percpu_alloc_percpu(_RET_IP_, reserved, is_atomic, size, align,
+                                 chunk->base_addr, off, ptr,
+                                 pcpu_obj_full_size(size), gfp);
 
        pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);