Code it similar to how we did it for the gfx and compute engines.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
 }
 
 /**
- * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
  *
  * @ring: amdgpu_ring pointer
- * @vm: amdgpu_vm pointer
  *
- * Update the page table base and flush the VM TLB
- * using sDMA (CIK).
+ * Make sure all previous operations are completed (CIK).
  */
-static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
-                                       unsigned vm_id, uint64_t pd_addr)
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
-       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
-                         SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
        uint32_t seq = ring->fence_drv.sync_seq;
        uint64_t addr = ring->fence_drv.gpu_addr;
 
        amdgpu_ring_write(ring, seq); /* reference */
        amdgpu_ring_write(ring, 0xfffffff); /* mask */
        amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
 
+/**
+ * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vm: amdgpu_vm pointer
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (CIK).
+ */
+static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
+                                       unsigned vm_id, uint64_t pd_addr)
+{
+       u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+                         SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
 
        amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
        if (vm_id < 8) {
        .parse_cs = NULL,
        .emit_ib = cik_sdma_ring_emit_ib,
        .emit_fence = cik_sdma_ring_emit_fence,
+       .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
        .emit_vm_flush = cik_sdma_ring_emit_vm_flush,
        .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
        .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
 
 }
 
 /**
- * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
+ * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
  *
  * @ring: amdgpu_ring pointer
- * @vm: amdgpu_vm pointer
  *
- * Update the page table base and flush the VM TLB
- * using sDMA (VI).
+ * Make sure all previous operations are completed (CIK).
  */
-static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
-                                        unsigned vm_id, uint64_t pd_addr)
+static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
        uint32_t seq = ring->fence_drv.sync_seq;
        uint64_t addr = ring->fence_drv.gpu_addr;
        amdgpu_ring_write(ring, 0xfffffff); /* mask */
        amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
                          SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
 
+/**
+ * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vm: amdgpu_vm pointer
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (VI).
+ */
+static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
+                                        unsigned vm_id, uint64_t pd_addr)
+{
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
        if (vm_id < 8) {
        .parse_cs = NULL,
        .emit_ib = sdma_v2_4_ring_emit_ib,
        .emit_fence = sdma_v2_4_ring_emit_fence,
+       .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
        .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
        .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
        .emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
 
 }
 
 /**
- * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
+ * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline
  *
  * @ring: amdgpu_ring pointer
- * @vm: amdgpu_vm pointer
  *
- * Update the page table base and flush the VM TLB
- * using sDMA (VI).
+ * Make sure all previous operations are completed (CIK).
  */
-static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-                                        unsigned vm_id, uint64_t pd_addr)
+static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
        uint32_t seq = ring->fence_drv.sync_seq;
        uint64_t addr = ring->fence_drv.gpu_addr;
        amdgpu_ring_write(ring, 0xfffffff); /* mask */
        amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
                          SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
 
+/**
+ * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vm: amdgpu_vm pointer
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (VI).
+ */
+static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+                                        unsigned vm_id, uint64_t pd_addr)
+{
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
        if (vm_id < 8) {
        .parse_cs = NULL,
        .emit_ib = sdma_v3_0_ring_emit_ib,
        .emit_fence = sdma_v3_0_ring_emit_fence,
+       .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
        .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
        .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
        .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,