Use amdgpu_ring_emit_reg_write_reg_wait.  On engines that support it,
it provides a write and wait in a single packet which avoids a missed
ack if a world switch happens between the request and waiting for the
ack.
Reviewed-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
        amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
                              upper_32_bits(pd_addr));
 
-       amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-       /* wait for the invalidate to complete */
-       amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-                                 1 << vmid, 1 << vmid);
+       amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+                                           hub->vm_inv_eng0_ack + eng,
+                                           req, 1 << vmid);
 
        return pd_addr;
 }