return r;
 
        ++(num_ibs[r]);
+       p->gang_leader_idx = r;
        return 0;
 }
 
                if (ret)
                        goto free_all_kdata;
        }
-       p->gang_leader = p->jobs[p->gang_size - 1];
+       p->gang_leader = p->jobs[p->gang_leader_idx];
 
        if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
                ret = -ECANCELED;
                        return r;
        }
 
-       for (i = 0; i < p->gang_size - 1; ++i) {
+       for (i = 0; i < p->gang_size; ++i) {
+               if (p->jobs[i] == leader)
+                       continue;
+
                r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
                if (r)
                        return r;
        }
 
-       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
+       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
        if (r && r != -ERESTARTSYS)
                DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
-
        return r;
 }
 
        for (i = 0; i < p->gang_size; ++i)
                drm_sched_job_arm(&p->jobs[i]->base);
 
-       for (i = 0; i < (p->gang_size - 1); ++i) {
+       for (i = 0; i < p->gang_size; ++i) {
                struct dma_fence *fence;
 
+               if (p->jobs[i] == leader)
+                       continue;
+
                fence = &p->jobs[i]->base.s_fence->scheduled;
                r = amdgpu_sync_fence(&leader->sync, fence);
                if (r)
        list_for_each_entry(e, &p->validated, tv.head) {
 
                /* Everybody except for the gang leader uses READ */
-               for (i = 0; i < (p->gang_size - 1); ++i) {
+               for (i = 0; i < p->gang_size; ++i) {
+                       if (p->jobs[i] == leader)
+                               continue;
+
                        dma_resv_add_fence(e->tv.bo->base.resv,
                                           &p->jobs[i]->base.s_fence->finished,
                                           DMA_RESV_USAGE_READ);
                e->tv.num_shared = 0;
        }
 
-       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
+       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
                                   p->fence);
        amdgpu_cs_post_dependencies(p);