}
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
+static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+                                     enum dma_data_direction direction)
+{
+       bool write = (direction == DMA_BIDIRECTIONAL ||
+                     direction == DMA_TO_DEVICE);
+       struct reservation_object *resv = dmabuf->resv;
+       long ret;
+
+       /* Wait on any implicit rendering fences */
+       ret = reservation_object_wait_timeout_rcu(resv, write, true,
+                                                 MAX_SCHEDULE_TIMEOUT);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
 
 /**
  * dma_buf_begin_cpu_access - Must be called before accessing a dma_buf from the
        if (dmabuf->ops->begin_cpu_access)
                ret = dmabuf->ops->begin_cpu_access(dmabuf, direction);
 
+       /* Ensure that all fences are waited upon - but we first allow
+        * the native handler the chance to do so more efficiently if it
+        * chooses. A double invocation here will be reasonably cheap no-op.
+        */
+       if (ret == 0)
+               ret = __dma_buf_begin_cpu_access(dmabuf, direction);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);