} global;
 
 struct active_node {
+       struct rb_node node;
        struct i915_active_fence base;
        struct i915_active *ref;
-       struct rb_node node;
        u64 timeline;
 };
 
+#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
+
 static inline struct active_node *
 node_from_active(struct i915_active_fence *active)
 {
                active_retire(container_of(cb, struct i915_active, excl.cb));
 }
 
-static struct i915_active_fence *
-active_instance(struct i915_active *ref, struct intel_timeline *tl)
+static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
 {
-       struct active_node *node, *prealloc;
-       struct rb_node **p, *parent;
-       u64 idx = tl->fence_context;
+       struct active_node *it;
 
        /*
         * We track the most recently used timeline to skip a rbtree search
         * after the previous activity has been retired, or if it matches the
         * current timeline.
         */
-       node = READ_ONCE(ref->cache);
-       if (node && node->timeline == idx)
+       it = READ_ONCE(ref->cache);
+       if (it && it->timeline == idx)
+               return it;
+
+       BUILD_BUG_ON(offsetof(typeof(*it), node));
+
+       /* While active, the tree can only be built; not destroyed */
+       GEM_BUG_ON(i915_active_is_idle(ref));
+
+       it = fetch_node(ref->tree.rb_node);
+       while (it) {
+               if (it->timeline < idx) {
+                       it = fetch_node(it->node.rb_right);
+               } else if (it->timeline > idx) {
+                       it = fetch_node(it->node.rb_left);
+               } else {
+                       WRITE_ONCE(ref->cache, it);
+                       break;
+               }
+       }
+
+       /* NB: If the tree rotated beneath us, we may miss our target. */
+       return it;
+}
+
+static struct i915_active_fence *
+active_instance(struct i915_active *ref, u64 idx)
+{
+       struct active_node *node, *prealloc;
+       struct rb_node **p, *parent;
+
+       node = __active_lookup(ref, idx);
+       if (likely(node))
                return &node->base;
 
        /* Preallocate a replacement, just in case */
        rb_insert_color(&node->node, &ref->tree);
 
 out:
-       ref->cache = node;
+       WRITE_ONCE(ref->cache, node);
        spin_unlock_irq(&ref->tree_lock);
 
-       BUILD_BUG_ON(offsetof(typeof(*node), base));
        return &node->base;
 }
 
        return ____active_del_barrier(ref, node, barrier_to_engine(node));
 }
 
-int i915_active_ref(struct i915_active *ref,
-                   struct intel_timeline *tl,
-                   struct dma_fence *fence)
+static bool
+replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
+{
+       if (!is_barrier(active)) /* proto-node used by our idle barrier? */
+               return false;
+
+       /*
+        * This request is on the kernel_context timeline, and so
+        * we can use it to substitute for the pending idle-barrer
+        * request that we want to emit on the kernel_context.
+        */
+       __active_del_barrier(ref, node_from_active(active));
+       return true;
+}
+
+int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
 {
        struct i915_active_fence *active;
        int err;
 
-       lockdep_assert_held(&tl->mutex);
-
        /* Prevent reaping in case we malloc/wait while building the tree */
        err = i915_active_acquire(ref);
        if (err)
                return err;
 
-       active = active_instance(ref, tl);
+       active = active_instance(ref, idx);
        if (!active) {
                err = -ENOMEM;
                goto out;
        }
 
-       if (is_barrier(active)) { /* proto-node used by our idle barrier */
-               /*
-                * This request is on the kernel_context timeline, and so
-                * we can use it to substitute for the pending idle-barrer
-                * request that we want to emit on the kernel_context.
-                */
-               __active_del_barrier(ref, node_from_active(active));
+       if (replace_barrier(ref, active)) {
                RCU_INIT_POINTER(active->fence, NULL);
                atomic_dec(&ref->count);
        }
        if (!__i915_active_fence_set(active, fence))
-               atomic_inc(&ref->count);
+               __i915_active_acquire(ref);
 
 out:
        i915_active_release(ref);
        return err;
 }
 
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+static struct dma_fence *
+__i915_active_set_fence(struct i915_active *ref,
+                       struct i915_active_fence *active,
+                       struct dma_fence *fence)
 {
        struct dma_fence *prev;
 
-       /* We expect the caller to manage the exclusive timeline ordering */
-       GEM_BUG_ON(i915_active_is_idle(ref));
+       if (replace_barrier(ref, active)) {
+               RCU_INIT_POINTER(active->fence, fence);
+               return NULL;
+       }
 
        rcu_read_lock();
-       prev = __i915_active_fence_set(&ref->excl, f);
+       prev = __i915_active_fence_set(active, fence);
        if (prev)
                prev = dma_fence_get_rcu(prev);
        else
-               atomic_inc(&ref->count);
+               __i915_active_acquire(ref);
        rcu_read_unlock();
 
        return prev;
 }
 
+static struct i915_active_fence *
+__active_fence(struct i915_active *ref, u64 idx)
+{
+       struct active_node *it;
+
+       it = __active_lookup(ref, idx);
+       if (unlikely(!it)) { /* Contention with parallel tree builders! */
+               spin_lock_irq(&ref->tree_lock);
+               it = __active_lookup(ref, idx);
+               spin_unlock_irq(&ref->tree_lock);
+       }
+       GEM_BUG_ON(!it); /* slot must be preallocated */
+
+       return &it->base;
+}
+
+struct dma_fence *
+__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+{
+       /* Only valid while active, see i915_active_acquire_for_context() */
+       return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
+}
+
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+       /* We expect the caller to manage the exclusive timeline ordering */
+       return __i915_active_set_fence(ref, &ref->excl, f);
+}
+
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
        debug_active_assert(ref);
        return err;
 }
 
+int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
+{
+       struct i915_active_fence *active;
+       int err;
+
+       err = i915_active_acquire(ref);
+       if (err)
+               return err;
+
+       active = active_instance(ref, idx);
+       if (!active) {
+               i915_active_release(ref);
+               return -ENOMEM;
+       }
+
+       return 0; /* return with active ref */
+}
+
 void i915_active_release(struct i915_active *ref)
 {
        debug_active_assert(ref);
 match:
        rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
        if (p == &ref->cache->node)
-               ref->cache = NULL;
+               WRITE_ONCE(ref->cache, NULL);
        spin_unlock_irq(&ref->tree_lock);
 
        return rb_entry(p, struct active_node, node);
                         */
                        RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
                        node->base.cb.node.prev = (void *)engine;
-                       atomic_inc(&ref->count);
+                       __i915_active_acquire(ref);
                }
                GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));