const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
-                                       unsigned long len);
+                                       unsigned long off, unsigned long len);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
 
        return rb->aux_nr_pages << PAGE_SHIFT;
 }
 
-#define __DEFINE_OUTPUT_COPY_BODY(memcpy_func)                         \
+#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...)       \
 {                                                                      \
        unsigned long size, written;                                    \
                                                                        \
        do {                                                            \
                size    = min(handle->size, len);                       \
-               written = memcpy_func(handle->addr, buf, size);         \
+               written = memcpy_func(__VA_ARGS__);                     \
                written = size - written;                               \
                                                                        \
                len -= written;                                         \
                handle->addr += written;                                \
-               buf += written;                                         \
+               if (advance_buf)                                        \
+                       buf += written;                                 \
                handle->size -= written;                                \
                if (!handle->size) {                                    \
                        struct ring_buffer *rb = handle->rb;            \
 static inline unsigned long                                            \
 func_name(struct perf_output_handle *handle,                           \
          const void *buf, unsigned long len)                           \
-__DEFINE_OUTPUT_COPY_BODY(memcpy_func)
+__DEFINE_OUTPUT_COPY_BODY(true, memcpy_func, handle->addr, buf, size)
 
 static inline unsigned long
 __output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
                const void *buf, unsigned long len)
-__DEFINE_OUTPUT_COPY_BODY(copy_func)
+{
+       unsigned long orig_len = len;
+       __DEFINE_OUTPUT_COPY_BODY(false, copy_func, handle->addr, buf,
+                                 orig_len - len, size)
+}
 
 static inline unsigned long
 memcpy_common(void *dst, const void *src, unsigned long n)
 
 }
 
 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
-                                 unsigned long len)
+                                 unsigned long off, unsigned long len)
 {
-       void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
+       void *ptr = skb_header_pointer(skb, off, len, dst_buff);
 
        if (unlikely(!ptr))
                return len;