drm/i915: Embrace the race in busy-ioctl

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 16 Aug 2016 08:50:40 +0000 (09:50 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 16 Aug 2016 09:35:02 +0000 (10:35 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 16 Aug 2016 08:50:40 +0000 (09:50 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 16 Aug 2016 09:35:02 +0000 (10:35 +0100)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 7e08c774a1aae98dc2049f627e46e9eca42452fc..a8d0f70c22f9a7adc7d81f408a5a69941b745ea3 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3807,49 +3807,68 @@ static __always_inline unsigned int
  __busy_set_if_active(const struct i915_gem_active *active,
                      unsigned int (*flag)(unsigned int id))
  {
-       /* For more discussion about the barriers and locking concerns,
-        * see __i915_gem_active_get_rcu().
-        */
-       do {
-               struct drm_i915_gem_request *request;
-               unsigned int id;
-
-               request = rcu_dereference(active->request);
-               if (!request || i915_gem_request_completed(request))
-                       return 0;
+       struct drm_i915_gem_request *request;
  
-               id = request->engine->exec_id;
+       request = rcu_dereference(active->request);
+       if (!request || i915_gem_request_completed(request))
+               return 0;
  
-               /* Check that the pointer wasn't reassigned and overwritten.
-                *
-                * In __i915_gem_active_get_rcu(), we enforce ordering between
-                * the first rcu pointer dereference (imposing a
-                * read-dependency only on access through the pointer) and
-                * the second lockless access through the memory barrier
-                * following a successful atomic_inc_not_zero(). Here there
-                * is no such barrier, and so we must manually insert an
-                * explicit read barrier to ensure that the following
-                * access occurs after all the loads through the first
-                * pointer.
-                *
-                * It is worth comparing this sequence with
-                * raw_write_seqcount_latch() which operates very similarly.
-                * The challenge here is the visibility of the other CPU
-                * writes to the reallocated request vs the local CPU ordering.
-                * Before the other CPU can overwrite the request, it will
-                * have updated our active->request and gone through a wmb.
-                * During the read here, we want to make sure that the values
-                * we see have not been overwritten as we do so - and we do
-                * that by serialising the second pointer check with the writes
-                * on other other CPUs.
-                *
-                * The corresponding write barrier is part of
-                * rcu_assign_pointer().
-                */
-               smp_rmb();
-               if (request == rcu_access_pointer(active->request))
-                       return flag(id);
-       } while (1);
+       /* This is racy. See __i915_gem_active_get_rcu() for an in detail
+        * discussion of how to handle the race correctly, but for reporting
+        * the busy state we err on the side of potentially reporting the
+        * wrong engine as being busy (but we guarantee that the result
+        * is at least self-consistent).
+        *
+        * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
+        * whilst we are inspecting it, even under the RCU read lock as we are.
+        * This means that there is a small window for the engine and/or the
+        * seqno to have been overwritten. The seqno will always be in the
+        * future compared to the intended, and so we know that if that
+        * seqno is idle (on whatever engine) our request is idle and the
+        * return 0 above is correct.
+        *
+        * The issue is that if the engine is switched, it is just as likely
+        * to report that it is busy (but since the switch happened, we know
+        * the request should be idle). So there is a small chance that a busy
+        * result is actually the wrong engine.
+        *
+        * So why don't we care?
+        *
+        * For starters, the busy ioctl is a heuristic that is by definition
+        * racy. Even with perfect serialisation in the driver, the hardware
+        * state is constantly advancing - the state we report to the user
+        * is stale.
+        *
+        * The critical information for the busy-ioctl is whether the object
+        * is idle as userspace relies on that to detect whether its next
+        * access will stall, or if it has missed submitting commands to
+        * the hardware allowing the GPU to stall. We never generate a
+        * false-positive for idleness, thus busy-ioctl is reliable at the
+        * most fundamental level, and we maintain the guarantee that a
+        * busy object left to itself will eventually become idle (and stay
+        * idle!).
+        *
+        * We allow ourselves the leeway of potentially misreporting the busy
+        * state because that is an optimisation heuristic that is constantly
+        * in flux. Being quickly able to detect the busy/idle state is much
+        * more important than accurate logging of exactly which engines were
+        * busy.
+        *
+        * For accuracy in reporting the engine, we could use
+        *
+        *      result = 0;
+        *      request = __i915_gem_active_get_rcu(active);
+        *      if (request) {
+        *              if (!i915_gem_request_completed(request))
+        *                      result = flag(request->engine->exec_id);
+        *              i915_gem_request_put(request);
+        *      }
+        *
+        * but that still remains susceptible to both hardware and userspace
+        * races. So we accept making the result of that race slightly worse,
+        * given the rarity of the race and its low impact on the result.
+        */
+       return flag(READ_ONCE(request->engine->exec_id));
  }
  
  static __always_inline unsigned int
@@ -3897,11 +3916,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                  * retired and freed. We take a local copy of the pointer,
                  * but before we add its engine into the busy set, the other
                  * thread reallocates it and assigns it to a task on another
-                * engine with a fresh and incomplete seqno.
-                *
-                * So after we lookup the engine's id, we double check that
-                * the active request is the same and only then do we add it
-                * into the busy set.
+                * engine with a fresh and incomplete seqno. Guarding against
+                * that requires careful serialisation and reference counting,
+                * i.e. using __i915_gem_active_get_request_rcu(). We don't,
+                * instead we expect that if the result is busy, which engines
+                * are busy is not completely reliable - we only guarantee
+                * that the object was busy.
                  */
                 rcu_read_lock();
  
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index 452629de7a577c5547c622d6098f428959bf3917..5501fe83ed92490613fbb25091b217fb0efa8997 100644 (file)
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -855,7 +855,16 @@ struct drm_i915_gem_busy {
          * having flushed any pending activity), and a non-zero return that
          * the object is still in-flight on the GPU. (The GPU has not yet
          * signaled completion for all pending requests that reference the
-        * object.)
+        * object.) An object is guaranteed to become idle eventually (so
+        * long as no new GPU commands are executed upon it). Due to the
+        * asynchronous nature of the hardware, an object reported
+        * as busy may become idle before the ioctl is completed.
+        *
+        * Furthermore, if the object is busy, which engine is busy is only
+        * provided as a guide. There are race conditions which prevent the
+        * report of which engines are busy from being always accurate.
+        * However, the converse is not true. If the object is idle, the
+        * result of the ioctl, that all engines are idle, is accurate.
          *
          * The returned dword is split into two fields to indicate both
          * the engines on which the object is being read, and the
@@ -878,6 +887,11 @@ struct drm_i915_gem_busy {
          * execution engines, e.g. multiple media engines, which are
          * mapped to the same identifier in the EXECBUFFER2 ioctl and
          * so are not separately reported for busyness.
+        *
+        * Caveat emptor:
+        * Only the boolean result of this query is reliable; that is whether
+        * the object is idle or busy. The report of which engines are busy
+        * should be only used as a heuristic.
          */
         __u32 busy;
  };
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 16 Aug 2016 08:50:40 +0000 (09:50 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 16 Aug 2016 09:35:02 +0000 (10:35 +0100)
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
include/uapi/drm/i915_drm.h		patch \| blob \| history