rcu_read_lock();
        task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
        seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix,
-                  rq->fence.seqno, rq->ctx->hw_id, rq->fence.seqno,
+                  rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
                   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
                   task ? task->comm : "<unknown>",
                   task ? task->pid : -1);
 
        /* Before we do the heavier coherent read of the seqno,
         * check the value (hopefully) in the CPU cacheline.
         */
-       if (i915_gem_request_completed(req))
+       if (__i915_gem_request_completed(req))
                return true;
 
        /* Ensure our read of the seqno is coherent so that we
                        wake_up_process(tsk);
                rcu_read_unlock();
 
-               if (i915_gem_request_completed(req))
+               if (__i915_gem_request_completed(req))
                        return true;
        }
 
 
                return;
 
        DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-                        engine->name, request->fence.seqno);
+                        engine->name, request->global_seqno);
 
        /* Setup the CS to resume from the breadcrumb of the hung request */
        engine->reset_hw(engine, request);
 
         * of being read by __i915_gem_active_get_rcu(). As such,
         * we have to be very careful when overwriting the contents. During
         * the RCU lookup, we change chase the request->engine pointer,
-        * read the request->fence.seqno and increment the reference count.
+        * read the request->global_seqno and increment the reference count.
         *
         * The reference count is incremented atomically. If it is zero,
         * the lookup knows the request is unallocated and complete. Otherwise,
        INIT_LIST_HEAD(&req->active_list);
        req->i915 = dev_priv;
        req->engine = engine;
+       req->global_seqno = seqno;
        req->ctx = i915_gem_context_get(ctx);
 
        /* No zalloc, must clear what we need by hand */
                return ret < 0 ? ret : 0;
        }
 
+       if (!from->global_seqno) {
+               ret = i915_sw_fence_await_dma_fence(&to->submit,
+                                                   &from->fence, 0,
+                                                   GFP_KERNEL);
+               return ret < 0 ? ret : 0;
+       }
+
        idx = intel_engine_sync_index(from->engine, to->engine);
-       if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+       if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx])
                return 0;
 
        trace_i915_gem_ring_sync_to(to, from);
                        return ret;
        }
 
-       from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+       from->engine->semaphore.sync_seqno[idx] = from->global_seqno;
        return 0;
 }
 
 
        timeout_us += local_clock_us(&cpu);
        do {
-               if (i915_gem_request_completed(req))
+               if (__i915_gem_request_completed(req))
                        return true;
 
                if (signal_pending_state(state, current))
 
                GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
        }
+       GEM_BUG_ON(!req->global_seqno);
 
        /* Optimistic short spin before touching IRQs */
        if (i915_spin_request(req, state, 5))
        if (flags & I915_WAIT_LOCKED)
                add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
-       intel_wait_init(&wait, req->fence.seqno);
+       intel_wait_init(&wait, req->global_seqno);
        if (intel_engine_add_wait(req->engine, &wait))
                /* In order to check that we haven't missed the interrupt
                 * as we enabled it, we need to kick ourselves to do a
 
        struct i915_sw_fence submit;
        wait_queue_t submitq;
 
+       u32 global_seqno;
+
        /** GEM sequence number associated with the previous request,
         * when the HWS breadcrumb is equal to this the GPU is processing
         * this request.
 static inline u32
 i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
 {
-       return req ? req->fence.seqno : 0;
+       return req ? req->global_seqno : 0;
 }
 
 static inline struct intel_engine_cs *
 }
 
 static inline bool
-i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
+       GEM_BUG_ON(!req->global_seqno);
        return i915_seqno_passed(intel_engine_get_seqno(req->engine),
                                 req->previous_seqno);
 }
 
 static inline bool
-i915_gem_request_completed(const struct drm_i915_gem_request *req)
+i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
+       if (!req->global_seqno)
+               return false;
+
+       return __i915_gem_request_started(req);
+}
+
+static inline bool
+__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+       GEM_BUG_ON(!req->global_seqno);
        return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-                                req->fence.seqno);
+                                req->global_seqno);
+}
+
+static inline bool
+i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+       if (!req->global_seqno)
+               return false;
+
+       return __i915_gem_request_completed(req);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
                                     int state, unsigned long timeout_us)
 {
-       return (i915_gem_request_started(request) &&
+       return (__i915_gem_request_started(request) &&
                __i915_spin_request(request, state, timeout_us));
 }
 
 
                           struct drm_i915_error_request *erq)
 {
        erq->context = request->ctx->hw_id;
-       erq->seqno = request->fence.seqno;
+       erq->seqno = request->global_seqno;
        erq->jiffies = request->emitted_jiffies;
        erq->head = request->head;
        erq->tail = request->tail;
 
        wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
        wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
-       wqi->fence_id = rq->fence.seqno;
+       wqi->fence_id = rq->global_seqno;
 
        kunmap_atomic(base);
 }
                client->b_fail += 1;
 
        guc->submissions[engine_id] += 1;
-       guc->last_seqno[engine_id] = rq->fence.seqno;
+       guc->last_seqno[engine_id] = rq->global_seqno;
        spin_unlock(&client->wq_lock);
 }
 
 
                           __entry->dev = from->i915->drm.primary->index;
                           __entry->sync_from = from->engine->id;
                           __entry->sync_to = to->engine->id;
-                          __entry->seqno = from->fence.seqno;
+                          __entry->seqno = from->global_seqno;
                           ),
 
            TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
            TP_fast_assign(
                           __entry->dev = req->i915->drm.primary->index;
                           __entry->ring = req->engine->id;
-                          __entry->seqno = req->fence.seqno;
+                          __entry->seqno = req->global_seqno;
                           __entry->flags = flags;
                           dma_fence_enable_sw_signaling(&req->fence);
                           ),
            TP_fast_assign(
                           __entry->dev = req->i915->drm.primary->index;
                           __entry->ring = req->engine->id;
-                          __entry->seqno = req->fence.seqno;
+                          __entry->seqno = req->global_seqno;
                           ),
 
            TP_printk("dev=%u, ring=%u, seqno=%u",
            TP_fast_assign(
                           __entry->dev = req->i915->drm.primary->index;
                           __entry->ring = req->engine->id;
-                          __entry->seqno = req->fence.seqno;
+                          __entry->seqno = req->global_seqno;
                           __entry->blocking =
                                     mutex_is_locked(&req->i915->drm.struct_mutex);
                           ),
 
 
        /* locked by dma_fence_enable_sw_signaling() */
        assert_spin_locked(&request->lock);
+       if (!request->global_seqno)
+               return;
 
        request->signaling.wait.tsk = b->signaler;
-       request->signaling.wait.seqno = request->fence.seqno;
+       request->signaling.wait.seqno = request->global_seqno;
        i915_gem_request_get(request);
 
        spin_lock(&b->lock);
        p = &b->signals.rb_node;
        while (*p) {
                parent = *p;
-               if (i915_seqno_passed(request->fence.seqno,
-                                     to_signaler(parent)->fence.seqno)) {
+               if (i915_seqno_passed(request->global_seqno,
+                                     to_signaler(parent)->global_seqno)) {
                        p = &parent->rb_right;
                        first = false;
                } else {
 
                        intel_hws_seqno_address(request->engine) |
                        MI_FLUSH_DW_USE_GTT);
        intel_ring_emit(ring, 0);
-       intel_ring_emit(ring, request->fence.seqno);
+       intel_ring_emit(ring, request->global_seqno);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
        intel_ring_emit(ring, MI_NOOP);
        return intel_logical_ring_advance(request);
                         PIPE_CONTROL_QW_WRITE));
        intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
        intel_ring_emit(ring, 0);
-       intel_ring_emit(ring, i915_gem_request_get_seqno(request));
+       intel_ring_emit(ring, request->global_seqno);
        /* We're thrashing one dword of HWS. */
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
 
                                PIPE_CONTROL_CS_STALL);
                intel_ring_emit(ring, lower_32_bits(gtt_offset));
                intel_ring_emit(ring, upper_32_bits(gtt_offset));
-               intel_ring_emit(ring, req->fence.seqno);
+               intel_ring_emit(ring, req->global_seqno);
                intel_ring_emit(ring, 0);
                intel_ring_emit(ring,
                                MI_SEMAPHORE_SIGNAL |
                                lower_32_bits(gtt_offset) |
                                MI_FLUSH_DW_USE_GTT);
                intel_ring_emit(ring, upper_32_bits(gtt_offset));
-               intel_ring_emit(ring, req->fence.seqno);
+               intel_ring_emit(ring, req->global_seqno);
                intel_ring_emit(ring,
                                MI_SEMAPHORE_SIGNAL |
                                MI_SEMAPHORE_TARGET(waiter->hw_id));
                if (i915_mmio_reg_valid(mbox_reg)) {
                        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
                        intel_ring_emit_reg(ring, mbox_reg);
-                       intel_ring_emit(ring, req->fence.seqno);
+                       intel_ring_emit(ring, req->global_seqno);
                }
        }
 
 
        intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
        intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-       intel_ring_emit(ring, req->fence.seqno);
+       intel_ring_emit(ring, req->global_seqno);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
        intel_ring_advance(ring);
 
                               PIPE_CONTROL_QW_WRITE));
        intel_ring_emit(ring, intel_hws_seqno_address(engine));
        intel_ring_emit(ring, 0);
-       intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+       intel_ring_emit(ring, req->global_seqno);
        /* We're thrashing one dword of HWS. */
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
                        MI_SEMAPHORE_WAIT |
                        MI_SEMAPHORE_GLOBAL_GTT |
                        MI_SEMAPHORE_SAD_GTE_SDD);
-       intel_ring_emit(ring, signal->fence.seqno);
+       intel_ring_emit(ring, signal->global_seqno);
        intel_ring_emit(ring, lower_32_bits(offset));
        intel_ring_emit(ring, upper_32_bits(offset));
        intel_ring_advance(ring);
         * seqno is >= the last seqno executed. However for hardware the
         * comparison is strictly greater than.
         */
-       intel_ring_emit(ring, signal->fence.seqno - 1);
+       intel_ring_emit(ring, signal->global_seqno - 1);
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_advance(ring);