drm/amdgpu: apply AMDGPU_IB_FLAG_EMIT_MEM_SYNC to compute IBs too (v3)

author Marek Olšák <marek.olsak@amd.com>

Mon, 27 Apr 2020 20:04:47 +0000 (16:04 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 18 May 2020 15:24:21 +0000 (11:24 -0400)
author Marek Olšák <marek.olsak@amd.com>
Mon, 27 Apr 2020 20:04:47 +0000 (16:04 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 18 May 2020 15:24:21 +0000 (11:24 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h

index cee6e8a3ad9c9227c81dcfe2f568daf508686219..5f3f6ebfb3876222e69804bba5ffdc899c35288d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -450,7 +450,7 @@
  #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
  #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
  #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
-#define        PACKET3_AQUIRE_MEM                              0x58
+#define        PACKET3_ACQUIRE_MEM                             0x58
  #define        PACKET3_REWIND                                  0x59
  #define        PACKET3_LOAD_UCONFIG_REG                        0x5E
  #define        PACKET3_LOAD_SH_REG                             0x5F
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

index 7ab6f6ae9a637d361b5508d4554f301387d51bfa..bd5dd4f6431103b8fded2463a37717870da6f6f8 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -8133,7 +8133,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                 2 + /* gfx_v10_0_ring_emit_vm_flush */
-               8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+               8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+               8, /* gfx_v10_0_emit_mem_sync */
         .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
         .emit_ib = gfx_v10_0_ring_emit_ib_compute,
         .emit_fence = gfx_v10_0_ring_emit_fence,
@@ -8148,6 +8149,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
         .emit_wreg = gfx_v10_0_ring_emit_wreg,
         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
         .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+       .emit_mem_sync = gfx_v10_0_emit_mem_sync,
  };
  
  static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

index 96112fb9273b2dee12053429b457e1b317a3bd96..79c52c7a02e3a0bd7779c136beafa591daa0cdaf 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3533,7 +3533,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
                 5 + 5 + /* hdp flush / invalidate */
                 7 + /* gfx_v6_0_ring_emit_pipeline_sync */
                 SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
-               14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
+               14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
+               5, /* SURFACE_SYNC */
         .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
         .emit_ib = gfx_v6_0_ring_emit_ib,
         .emit_fence = gfx_v6_0_ring_emit_fence,
@@ -3543,6 +3544,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
         .test_ib = gfx_v6_0_ring_test_ib,
         .insert_nop = amdgpu_ring_insert_nop,
         .emit_wreg = gfx_v6_0_ring_emit_wreg,
+       .emit_mem_sync = gfx_v6_0_emit_mem_sync,
  };
  
  static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

index b2f10e39eff169272f2ad71c0166de456d3ac280..0cc011f9190d3eaabac0adc40f6883cff9462724 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5010,6 +5010,20 @@ static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
  }
  
+static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+                         PACKET3_TC_ACTION_ENA |
+                         PACKET3_SH_KCACHE_ACTION_ENA |
+                         PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
+       amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
+       amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
+}
+
  static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
         .name = "gfx_v7_0",
         .early_init = gfx_v7_0_early_init,
@@ -5075,7 +5089,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
                 5 + /* hdp invalidate */
                 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
                 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
-               7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+               7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+               7, /* gfx_v7_0_emit_mem_sync_compute */
         .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
         .emit_ib = gfx_v7_0_ring_emit_ib_compute,
         .emit_fence = gfx_v7_0_ring_emit_fence_compute,
@@ -5088,6 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
         .insert_nop = amdgpu_ring_insert_nop,
         .pad_ib = amdgpu_ring_generic_pad_ib,
         .emit_wreg = gfx_v7_0_ring_emit_wreg,
+       .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
  };
  
  static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

index 6ae78b9e9551d47e8ad687a0fd6c691cc7ef2df6..1d4128227ffd6d1fda53755e91b5bf39c5490fe0 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6830,6 +6830,21 @@ static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
         amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
  }
  
+static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+                         PACKET3_TC_ACTION_ENA |
+                         PACKET3_SH_KCACHE_ACTION_ENA |
+                         PACKET3_SH_ICACHE_ACTION_ENA |
+                         PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0xff);          /* CP_COHER_SIZE_HI */
+       amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0);             /* CP_COHER_BASE_HI */
+       amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
+}
+
  static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
         .name = "gfx_v8_0",
         .early_init = gfx_v8_0_early_init,
@@ -6912,7 +6927,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
                 5 + /* hdp_invalidate */
                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
-               7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+               7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+               7, /* gfx_v8_0_emit_mem_sync_compute */
         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
@@ -6925,6 +6941,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
         .insert_nop = amdgpu_ring_insert_nop,
         .pad_ib = amdgpu_ring_generic_pad_ib,
         .emit_wreg = gfx_v8_0_ring_emit_wreg,
+       .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
  };
  
  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index b6916f82c70539a9a394f30b2750778b72496ddc..1573ac1f03b246d999629549a7af64b51371324f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6741,7 +6741,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                 2 + /* gfx_v9_0_ring_emit_vm_flush */
-               8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+               8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+               7, /* gfx_v9_0_emit_mem_sync */
         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
         .emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6756,6 +6757,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
         .emit_wreg = gfx_v9_0_ring_emit_wreg,
         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+       .emit_mem_sync = gfx_v9_0_emit_mem_sync,
  };
  
  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h

index 19ddd2312e00d4c8f0178d4fe6130d8b80fb09b8..7a01e6133798014e9a5e290fa9d3d8003af21069 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -332,7 +332,7 @@
  #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
  #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
  #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
-#define        PACKET3_AQUIRE_MEM                              0x58
+#define        PACKET3_ACQUIRE_MEM                             0x58
  #define        PACKET3_REWIND                                  0x59
  #define        PACKET3_LOAD_UCONFIG_REG                        0x5E
  #define        PACKET3_LOAD_SH_REG                             0x5F
author	Marek Olšák <marek.olsak@amd.com>
	Mon, 27 Apr 2020 20:04:47 +0000 (16:04 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 18 May 2020 15:24:21 +0000 (11:24 -0400)
drivers/gpu/drm/amd/amdgpu/cikd.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/vid.h		patch \| blob \| history