]> www.infradead.org Git - users/willy/xarray.git/commitdiff
drm/amdgpu/gfx10: Refine Cleaner Shader for GFX10.1.10
authorVitaly Prosyak <vitaly.prosyak@amd.com>
Tue, 6 May 2025 20:45:33 +0000 (16:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 3 Jun 2025 19:03:09 +0000 (15:03 -0400)
This patch updates the cleaner shader, which is responsible for
initializing GPU resources such as Local Data Share (LDS), Vector
General Purpose Registers (VGPRs), and Scalar General Purpose Registers
(SGPRs). Changes include adjustments to register clearing and shader
configuration.

- Updated GPU resource initialization addresses in the cleaner shader
  from `be803080` to `be803000`.
- Simplified the logic in the SGPR clearing section, ensuring all SGPRs
  are set to zero.

Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10")
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Manu Rastogi <manu.rastogi@amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm

index 5255378af53c0a2d8941aea9226996542c356a24..f67569ccf9f60940b7327e20105501a7c37251de 100644 (file)
@@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
        0xd70f6a01, 0x000202ff,
        0x00000400, 0x80828102,
        0xbf84fff7, 0xbefc03ff,
-       0x00000068, 0xbe803080,
-       0xbe813080, 0xbe823080,
-       0xbe833080, 0x80fc847c,
+       0x00000068, 0xbe803000,
+       0xbe813000, 0xbe823000,
+       0xbe833000, 0x80fc847c,
        0xbf84fffa, 0xbeea0480,
        0xbeec0480, 0xbeee0480,
        0xbef00480, 0xbef20480,
index 9ba3359253c95d15802f0d9de6899827efc79813..54f7ed9e2801c5d6b9ab6e4935b05823267bfbac 100644 (file)
@@ -40,7 +40,6 @@ shader main
   type(CS)
   wave_size(32)
 // Note: original source code from SQ team
-
 //
 // Create 32 waves in a threadgroup (CS waves)
 // Each allocates 64 VGPRs
@@ -71,8 +70,8 @@ label_0005:
   s_sub_u32     s2, s2, 8
   s_cbranch_scc0  label_0005
   //
-  s_mov_b32     s2, 0x80000000                     // Bit31 is first_wave
-  s_and_b32     s2, s2, s0                                  // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+  s_mov_b32     s2, 0x80000000                       // Bit31 is first_wave
+  s_and_b32     s2, s2, s1                           // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
   s_cbranch_scc0  label_0023                         // Clean LDS if its first wave of ThreadGroup/WorkGroup
   // CLEAR LDS
   //
@@ -99,10 +98,10 @@ label_001F:
 label_0023:
   s_mov_b32     m0, 0x00000068  // Loop 108/4=27 times  (loop unrolled for performance)
 label_sgpr_loop:
-  s_movreld_b32     s0, 0
-  s_movreld_b32     s1, 0
-  s_movreld_b32     s2, 0
-  s_movreld_b32     s3, 0
+  s_movreld_b32     s0, s0
+  s_movreld_b32     s1, s0
+  s_movreld_b32     s2, s0
+  s_movreld_b32     s3, s0
   s_sub_u32         m0, m0, 4
   s_cbranch_scc0  label_sgpr_loop