From 230a4b0528c0f2439df3f6cc5a6f053089a116b0 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:54 -0400 Subject: [PATCH 01/16] drm/amdgpu: make GFX6 easier to read Just fix the style and add a comment for reading easiness Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index fedf88595597..9c6453b458b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1735,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_get_cu_info(adev); gfx_v6_0_config_init(adev); - WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | - (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); - WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | - (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); + WREG32(mmCP_QUEUE_THRESHOLDS, + ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | + (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); + + /* set HW defaults for 3D engine */ + WREG32(mmCP_MEQ_THRESHOLDS, + (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | + (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); sx_debug_1 = RREG32(mmSX_DEBUG_1); WREG32(mmSX_DEBUG_1, sx_debug_1); -- 2.51.0 From 14f15aa054419ecd3a2578822958f87a84142bee Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:56 -0400 Subject: [PATCH 02/16] drm/amdgpu: move si_dma.c away from sid.h and si_enums.h Replace defines for the ones in oss_1_0_d.h and oss_1_0_sh_mask.h Taking the opportunity to add some comments taken from cik_sdma.c Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 4 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 108 ++++++++++++++++------------ drivers/gpu/drm/amd/amdgpu/sid.h | 36 ---------- 3 files changed, 63 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 448f246f4537..94a6ec162640 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1130,8 +1130,8 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmGRBM_STATUS_SE1}, {mmSRBM_STATUS}, {mmSRBM_STATUS2}, - {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, - {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET}, {mmCP_STAT}, {mmCP_STALLED_STAT1}, {mmCP_STALLED_STAT2}, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 552efcc7764f..bfdaaca434ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -40,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); +/** + * si_dma_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return *ring->rptr_cpu_addr; } +/** + * si_dma_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; + return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; } static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) @@ -58,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -119,9 +133,9 @@ static void si_dma_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { /* dma0 */ - rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); + rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); } } @@ -142,37 +156,37 @@ static int si_dma_start(struct amdgpu_device *adev) rb_bufsz = order_base_2(ring->ring_size / 4); rb_cntl = rb_bufsz << 1; #ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; #endif - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); - WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); rptr_addr = ring->rptr_gpu_addr; - WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); - WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; - WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); + WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; + ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; #ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; + ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; #endif - WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); + WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); + dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); + dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; + WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); r = amdgpu_ring_test_helper(ring); if (r) @@ -547,9 +561,9 @@ static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS2); + u32 tmp = RREG32(mmSRBM_STATUS2); - if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) + if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) return false; return true; @@ -585,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -601,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -647,11 +661,11 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data &= ~MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); - WREG32(DMA_CLK_CTRL + offset, 0x00000100); + WREG32(mmDMA_POWER_CNTL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); } } else { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -659,15 +673,15 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data |= MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); + WREG32(mmDMA_POWER_CNTL + offset, data); - orig = data = RREG32(DMA_CLK_CTRL + offset); + orig = data = RREG32(mmDMA_CLK_CTRL + offset); data = 0xff000000; if (data != orig) - WREG32(DMA_CLK_CTRL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, data); } } @@ -681,11 +695,11 @@ static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, struct amdgpu_device *adev = ip_block->adev; - WREG32(DMA_PGFSM_WRITE, 0x00002000); - WREG32(DMA_PGFSM_CONFIG, 0x100010ff); + WREG32(mmDMA_PGFSM_WRITE, 0x00002000); + WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); for (tmp = 0; tmp < 5; tmp++) - WREG32(DMA_PGFSM_WRITE, 0); + WREG32(mmDMA_PGFSM_WRITE, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 86b6e830cd71..085b2b1cf120 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1559,44 +1559,11 @@ #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ #define DMA1_REGISTER_OFFSET 0x200 /* not a register */ -#define DMA_RB_CNTL 0x3400 -# define DMA_RB_ENABLE (1 << 0) -# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ -# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) -# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ -#define DMA_RB_BASE 0x3401 -#define DMA_RB_RPTR 0x3402 -#define DMA_RB_WPTR 0x3403 - -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 - -#define DMA_IB_CNTL 0x3409 -# define DMA_IB_ENABLE (1 << 0) -# define DMA_IB_SWAP_ENABLE (1 << 4) -# define CMD_VMID_FORCE (1 << 31) #define DMA_IB_RPTR 0x340a -#define DMA_CNTL 0x340b -# define TRAP_ENABLE (1 << 0) -# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) -# define SEM_WAIT_INT_ENABLE (1 << 2) -# define DATA_SWAP_ENABLE (1 << 3) -# define FENCE_SWAP_ENABLE (1 << 4) -# define CTXEMPTY_INT_ENABLE (1 << 28) -#define DMA_STATUS_REG 0x340d -# define DMA_IDLE (1 << 0) #define DMA_TILING_CONFIG 0x342e -#define DMA_POWER_CNTL 0x342f -# define MEM_POWER_OVERRIDE (1 << 8) -#define DMA_CLK_CTRL 0x3430 - #define DMA_PG 0x3435 # define PG_CNTL_ENABLE (1 << 0) -#define DMA_PGFSM_CONFIG 0x3436 -#define DMA_PGFSM_WRITE 0x3437 #define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ (((b) & 0x1) << 26) | \ @@ -2035,10 +2002,7 @@ #define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 #define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 #define DMA_MODE 0x342f -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 #define DMA_BUSY_MASK 0x20 -#define DMA1_BUSY_MASK 0X40 #define SDMA_MAX_INSTANCE 2 #define PCIE_BUS_CLK 10000 -- 2.51.0 From d35a412910906ffdcb46a33b20ccd4676b62fccd Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:58 -0400 Subject: [PATCH 03/16] drm/amdgpu: keep removing sid.h dependency from si_dma.c Move and rename DMA_SEM_INCOMPLETE_TIMER_CNTL and DMA_SEM_WAIT_FAIL_TIMER_CNTL in oss_1_0_d.h Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dma.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sid.h | 2 -- drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h | 2 ++ 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index bfdaaca434ab..7f18e4875287 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -149,8 +149,8 @@ static int si_dma_start(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); @@ -477,7 +477,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->sdma.num_instances = 2; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; si_dma_set_ring_funcs(adev); si_dma_set_buffer_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 085b2b1cf120..0ace1ede24a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1999,8 +1999,6 @@ #define AMDGPU_PCIE_INDEX 0xc #define AMDGPU_PCIE_DATA 0xd -#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 -#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 #define DMA_MODE 0x342f #define DMA_BUSY_MASK 0x20 #define SDMA_MAX_INSTANCE 2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h index cef026359691..4dd386b98748 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h @@ -246,6 +246,8 @@ #define mmDMA_CNTL 0x340b #define mmDMA_STATUS_REG 0x340D #define mmDMA_TILING_CONFIG 0x342E +#define mmDMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 +#define mmDMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 #define mmDMA_POWER_CNTL 0x342F #define mmDMA_CLK_CTRL 0x3430 #define mmDMA_PG 0x3435 -- 2.51.0 From b71b7cd91c642f282794b82cf2b8159899b9aa59 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:46:59 -0400 Subject: [PATCH 04/16] drm/amdgpu: cleanup DCE6 a bit more Use shifts already available in DCE6's defines, masks and shifts. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sid.h | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index cbedf91b68bf..2d85ea7159de 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1998,8 +1998,8 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, case DRM_FORMAT_ABGR8888: fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); - fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) | - GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R)); + fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) | + (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); #ifdef __BIG_ENDIAN fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 0ace1ede24a7..fc61c2ea002f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1902,25 +1902,21 @@ #define ES_AND_GS_AUTO 3 #define BUF_SWAP_32BIT (2 << 16) -#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) #define GRPH_RED_SEL_R 0 #define GRPH_RED_SEL_G 1 #define GRPH_RED_SEL_B 2 #define GRPH_RED_SEL_A 3 -#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) #define GRPH_GREEN_SEL_G 0 #define GRPH_GREEN_SEL_B 1 #define GRPH_GREEN_SEL_A 2 #define GRPH_GREEN_SEL_R 3 -#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) #define GRPH_BLUE_SEL_B 0 #define GRPH_BLUE_SEL_A 1 #define GRPH_BLUE_SEL_R 2 #define GRPH_BLUE_SEL_G 3 -#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) #define GRPH_ALPHA_SEL_A 0 #define GRPH_ALPHA_SEL_R 1 #define GRPH_ALPHA_SEL_G 2 -- 2.51.0 From 48b733d99b0d8db01fcf55d7ae31f69510fb1a4a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Feb 2025 12:31:28 -0500 Subject: [PATCH 05/16] drm/amdgpu: add rebar parameter Add a new parameter to disable BAR resizing. Note that this only disables the driver from attempting to resize the BAR, The BIOS may have resized the BAR at boot. Some teams have found this useful in debugging P2P DMA issues on systems where the available MMIO space did not allow for all of the GPUs present to resize their BARs. Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9a9e5249c63f..8316f93c1cce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -266,6 +266,7 @@ extern int amdgpu_umsch_mm_fwlog; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; +extern int amdgpu_rebar; extern int amdgpu_wbrf; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7525128f971f..f4a01704effc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1680,6 +1680,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + if (!amdgpu_rebar) + return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ if ((amdgpu_runtime_pm != 0) && adev->pdev->vendor == PCI_VENDOR_ID_ATI && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 26bf896f1444..2ca04d6aaa82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -238,6 +238,7 @@ int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; +int amdgpu_rebar = -1; /* auto */ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -1096,6 +1097,16 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +/** + * DOC: rebar (int) + * Allow BAR resizing. Disable this to prevent the driver from attempting + * to resize the BAR if the GPU supports it and there is available MMIO space. + * Note that this just prevents the driver from resizing the BAR. The BIOS + * may have already resized the BAR at boot time. + */ +MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(rebar, amdgpu_rebar, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ -- 2.51.0 From c6ae8d587eeb2d285ee13d064fd73ad0568198c4 Mon Sep 17 00:00:00 2001 From: Andres Urian Florez Date: Mon, 24 Mar 2025 19:07:21 -0500 Subject: [PATCH 06/16] drm/amdgpu: Replace deprecated function strcpy() with strscpy() Instead of using the strcpy() deprecated function to populate the fw_name, use the strscpy() function Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy Signed-off-by: Andres Urian Florez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 68 ++++++++++++------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 68bce6a6d09d..525e53c94f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -253,16 +253,16 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { case CHIP_TAHITI: - strcpy(fw_name, "radeon/tahiti_smc.bin"); + strscpy(fw_name, "radeon/tahiti_smc.bin"); break; case CHIP_PITCAIRN: if ((adev->pdev->revision == 0x81) && ((adev->pdev->device == 0x6810) || (adev->pdev->device == 0x6811))) { info->is_kicker = true; - strcpy(fw_name, "radeon/pitcairn_k_smc.bin"); + strscpy(fw_name, "radeon/pitcairn_k_smc.bin"); } else { - strcpy(fw_name, "radeon/pitcairn_smc.bin"); + strscpy(fw_name, "radeon/pitcairn_smc.bin"); } break; case CHIP_VERDE: @@ -276,9 +276,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->device == 0x6823) || (adev->pdev->device == 0x682b)))) { info->is_kicker = true; - strcpy(fw_name, "radeon/verde_k_smc.bin"); + strscpy(fw_name, "radeon/verde_k_smc.bin"); } else { - strcpy(fw_name, "radeon/verde_smc.bin"); + strscpy(fw_name, "radeon/verde_smc.bin"); } break; case CHIP_OLAND: @@ -290,9 +290,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->revision == 0x83) && (adev->pdev->device == 0x6610))) { info->is_kicker = true; - strcpy(fw_name, "radeon/oland_k_smc.bin"); + strscpy(fw_name, "radeon/oland_k_smc.bin"); } else { - strcpy(fw_name, "radeon/oland_smc.bin"); + strscpy(fw_name, "radeon/oland_smc.bin"); } break; case CHIP_HAINAN: @@ -304,13 +304,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->device == 0x6665) || (adev->pdev->device == 0x6667)))) { info->is_kicker = true; - strcpy(fw_name, "radeon/hainan_k_smc.bin"); + strscpy(fw_name, "radeon/hainan_k_smc.bin"); } else if ((adev->pdev->revision == 0xc3) && (adev->pdev->device == 0x6665)) { info->is_kicker = true; - strcpy(fw_name, "radeon/banks_k_2_smc.bin"); + strscpy(fw_name, "radeon/banks_k_2_smc.bin"); } else { - strcpy(fw_name, "radeon/hainan_smc.bin"); + strscpy(fw_name, "radeon/hainan_smc.bin"); } break; case CHIP_BONAIRE: @@ -318,17 +318,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0x81) || (adev->pdev->device == 0x665f)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/bonaire_k_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/bonaire_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_smc.bin"); } break; case CHIP_HAWAII: if (adev->pdev->revision == 0x80) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/hawaii_k_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/hawaii_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_smc.bin"); } break; case CHIP_TOPAZ: @@ -338,76 +338,76 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) || ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/topaz_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_smc.bin"); break; case CHIP_TONGA: if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) || ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/tonga_k_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/tonga_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_smc.bin"); break; case CHIP_FIJI: - strcpy(fw_name, "amdgpu/fiji_smc.bin"); + strscpy(fw_name, "amdgpu/fiji_smc.bin"); break; case CHIP_POLARIS11: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k_smc.bin"); } else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris11_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); } break; case CHIP_POLARIS10: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k_smc.bin"); } else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris10_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); } break; case CHIP_POLARIS12: if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris12_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_smc.bin"); } break; case CHIP_VEGAM: - strcpy(fw_name, "amdgpu/vegam_smc.bin"); + strscpy(fw_name, "amdgpu/vegam_smc.bin"); break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || (adev->pdev->revision == 0xc1) || (adev->pdev->revision == 0xc3))) - strcpy(fw_name, "amdgpu/vega10_acg_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_acg_smc.bin"); else - strcpy(fw_name, "amdgpu/vega10_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_smc.bin"); break; case CHIP_VEGA12: - strcpy(fw_name, "amdgpu/vega12_smc.bin"); + strscpy(fw_name, "amdgpu/vega12_smc.bin"); break; case CHIP_VEGA20: - strcpy(fw_name, "amdgpu/vega20_smc.bin"); + strscpy(fw_name, "amdgpu/vega20_smc.bin"); break; default: DRM_ERROR("SMC firmware not supported\n"); -- 2.51.0 From 3470f80bd36e9b3da2542a63c1b62114eecce01c Mon Sep 17 00:00:00 2001 From: Ananta Srikar Date: Mon, 24 Mar 2025 21:49:12 -0400 Subject: [PATCH 07/16] drm/amd/amdgpu: Fix typo Fixes a typo in the word "version" in an error message. Signed-off-by: Ananta Srikar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ef9538fbbf53..c34a00500e18 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -649,7 +649,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n"); + dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); return -EINVAL; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; -- 2.51.0 From 160d3d39f61cb92a6cabceebc2d226ed3c5ab4c3 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Fri, 21 Mar 2025 21:47:00 -0400 Subject: [PATCH 08/16] drm/amdgpu: continue cleaning up sid.h and si_enums.h Remove more duplicated defines and move some in sid.h for coherence with CIK. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_enums.h | 90 +-------------------------- drivers/gpu/drm/amd/amdgpu/sid.h | 16 ++++- 2 files changed, 14 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index aee3036be30e..6da65778292b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -23,96 +23,15 @@ #ifndef SI_ENUMS_H #define SI_ENUMS_H -#define VBLANK_ACK (1 << 4) -#define VLINE_ACK (1 << 4) - -#define CURSOR_WIDTH 64 -#define CURSOR_HEIGHT 64 - #define PRIORITY_MARK_MASK 0x7fff #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) -#define GRPH_ENDIAN_NONE 0 -#define GRPH_ENDIAN_8IN16 1 -#define GRPH_ENDIAN_8IN32 2 -#define GRPH_ENDIAN_8IN64 3 -#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -#define GRPH_RED_SEL_R 0 -#define GRPH_RED_SEL_G 1 -#define GRPH_RED_SEL_B 2 -#define GRPH_RED_SEL_A 3 -#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -#define GRPH_GREEN_SEL_G 0 -#define GRPH_GREEN_SEL_B 1 -#define GRPH_GREEN_SEL_A 2 -#define GRPH_GREEN_SEL_R 3 -#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -#define GRPH_BLUE_SEL_B 0 -#define GRPH_BLUE_SEL_A 1 -#define GRPH_BLUE_SEL_R 2 -#define GRPH_BLUE_SEL_G 3 -#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -#define GRPH_ALPHA_SEL_A 0 -#define GRPH_ALPHA_SEL_R 1 -#define GRPH_ALPHA_SEL_G 2 -#define GRPH_ALPHA_SEL_B 3 - -#define GRPH_DEPTH_8BPP 0 -#define GRPH_DEPTH_16BPP 1 -#define GRPH_DEPTH_32BPP 2 - -#define GRPH_FORMAT_INDEXED 0 -#define GRPH_FORMAT_ARGB1555 0 -#define GRPH_FORMAT_ARGB565 1 -#define GRPH_FORMAT_ARGB4444 2 -#define GRPH_FORMAT_AI88 3 -#define GRPH_FORMAT_MONO16 4 -#define GRPH_FORMAT_BGRA5551 5 -#define GRPH_FORMAT_ARGB8888 0 -#define GRPH_FORMAT_ARGB2101010 1 -#define GRPH_FORMAT_32BPP_DIG 2 -#define GRPH_FORMAT_8B_ARGB2101010 3 -#define GRPH_FORMAT_BGRA1010102 4 -#define GRPH_FORMAT_8B_BGRA1010102 5 -#define GRPH_FORMAT_RGB111110 6 -#define GRPH_FORMAT_BGR101111 7 - -#define GRPH_ARRAY_LINEAR_GENERAL 0 -#define GRPH_ARRAY_LINEAR_ALIGNED 1 -#define GRPH_ARRAY_1D_TILED_THIN1 2 -#define GRPH_ARRAY_2D_TILED_THIN1 4 - -#define CURSOR_EN (1 << 0) -#define CURSOR_MODE(x) (((x) & 0x3) << 8) -#define CURSOR_MONO 0 -#define CURSOR_24_1 1 -#define CURSOR_24_8_PRE_MULT 2 -#define CURSOR_24_8_UNPRE_MULT 3 -#define CURSOR_2X_MAGNIFY (1 << 16) -#define CURSOR_FORCE_MC_ON (1 << 20) -#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -#define CURSOR_URGENT_ALWAYS 0 -#define CURSOR_URGENT_1_8 1 -#define CURSOR_URGENT_1_4 2 -#define CURSOR_URGENT_3_8 3 -#define CURSOR_URGENT_1_2 4 -#define CURSOR_UPDATE_PENDING (1 << 0) -#define CURSOR_UPDATE_TAKEN (1 << 1) -#define CURSOR_UPDATE_LOCK (1 << 16) -#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - - -#define ES_AND_GS_AUTO 3 -#define RADEON_PACKET_TYPE3 3 -#define CE_PARTITION_BASE 3 -#define BUF_SWAP_32BIT (2 << 16) - #define GFX_POWER_STATUS (1 << 1) #define GFX_CLOCK_STATUS (1 << 2) #define GFX_LS_STATUS (1 << 3) -#define RLC_BUSY_STATUS (1 << 0) +#define RLC_BUSY_STATUS (1 << 0) #define RLC_PUD(x) ((x) << 0) #define RLC_PUD_MASK (0xff << 0) #define RLC_PDD(x) ((x) << 8) @@ -121,13 +40,6 @@ #define RLC_TTPD_MASK (0xff << 16) #define RLC_MSD(x) ((x) << 24) #define RLC_MSD_MASK (0xff << 24) -#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) -#define WRITE_DATA_DST_SEL(x) ((x) << 8) -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) -#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) -#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) -#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index fc61c2ea002f..2a43ede5dff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1592,6 +1592,7 @@ #define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf +/* VCE */ #define VCE_STATUS 0x20004 #define VCE_VCPU_CNTL 0x20014 #define VCE_CLK_EN (1 << 0) @@ -1785,6 +1786,14 @@ #define EVERGREEN_VIEWPORT_SIZE 0x1b5d #define EVERGREEN_DESKTOP_HEIGHT 0x1ac1 +#define GRPH_ARRAY_LINEAR_GENERAL 0 +#define GRPH_ARRAY_LINEAR_ALIGNED 1 +#define GRPH_ARRAY_1D_TILED_THIN1 2 +#define GRPH_ARRAY_2D_TILED_THIN1 4 + +#define ES_AND_GS_AUTO 3 +#define BUF_SWAP_32BIT (2 << 16) + /* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */ #define EVERGREEN_CUR_CONTROL 0x1a66 # define EVERGREEN_CURSOR_EN (1 << 0) @@ -1899,9 +1908,10 @@ #define GRPH_FORMAT_RGB111110 6 #define GRPH_FORMAT_BGR101111 7 -#define ES_AND_GS_AUTO 3 -#define BUF_SWAP_32BIT (2 << 16) - +#define GRPH_ENDIAN_NONE 0 +#define GRPH_ENDIAN_8IN16 1 +#define GRPH_ENDIAN_8IN32 2 +#define GRPH_ENDIAN_8IN64 3 #define GRPH_RED_SEL_R 0 #define GRPH_RED_SEL_G 1 #define GRPH_RED_SEL_B 2 -- 2.51.0 From 8ae1a4eef78c09a61b62f85e2dfc128c7365d18d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Mar 2025 17:35:32 -0500 Subject: [PATCH 09/16] drm/amdgpu: add initial documentation for debugfs files Describes what debugfs files are available and what they are used for. v2: fix some typos (Mark Glines) v3: Address comments from Siqueira and Kent Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/debugfs.rst | 210 +++++++++++++++++++++++++ Documentation/gpu/amdgpu/debugging.rst | 7 + Documentation/gpu/amdgpu/index.rst | 1 + 3 files changed, 218 insertions(+) create mode 100644 Documentation/gpu/amdgpu/debugfs.rst diff --git a/Documentation/gpu/amdgpu/debugfs.rst b/Documentation/gpu/amdgpu/debugfs.rst new file mode 100644 index 000000000000..fe7736a0b43a --- /dev/null +++ b/Documentation/gpu/amdgpu/debugfs.rst @@ -0,0 +1,210 @@ +============== +AMDGPU DebugFS +============== + +The amdgpu driver provides a number of debugfs files to aid in debugging +issues in the driver. These are usually found in +/sys/kernel/debug/dri/. + +DebugFS Files +============= + +amdgpu_benchmark +---------------- + +Run benchmarks using the DMA engine the driver uses for GPU memory paging. +Write a number to the file to run the test. The results are written to the +kernel log. VRAM is on device memory (dGPUs) or cave out (APUs) and GTT +(Graphics Translation Tables) is system memory that is accessible by the GPU. +The following tests are available: + +- 1: simple test, VRAM to GTT and GTT to VRAM +- 2: simple test, VRAM to VRAM +- 3: GTT to VRAM, buffer size sweep, powers of 2 +- 4: VRAM to GTT, buffer size sweep, powers of 2 +- 5: VRAM to VRAM, buffer size sweep, powers of 2 +- 6: GTT to VRAM, buffer size sweep, common display sizes +- 7: VRAM to GTT, buffer size sweep, common display sizes +- 8: VRAM to VRAM, buffer size sweep, common display sizes + +amdgpu_test_ib +-------------- + +Read this file to run simple IB (Indirect Buffer) tests on all kernel managed +rings. IBs are command buffers usually generated by userspace applications +which are submitted to the kernel for execution on an particular GPU engine. +This just runs the simple IB tests included in the kernel. These tests +are engine specific and verify that IB submission works. + +amdgpu_discovery +---------------- + +Provides raw access to the IP discovery binary provided by the GPU. Read this +file to access the raw binary. This is useful for verifying the contents of +the IP discovery table. It is chip specific. + +amdgpu_vbios +------------ + +Provides raw access to the ROM binary image from the GPU. Read this file to +access the raw binary. This is useful for verifying the contents of the +video BIOS ROM. It is board specific. + +amdgpu_evict_gtt +---------------- + +Evict all buffers from the GTT memory pool. Read this file to evict all +buffers from this pool. + +amdgpu_evict_vram +----------------- + +Evict all buffers from the VRAM memory pool. Read this file to evict all +buffers from this pool. + +amdgpu_gpu_recover +------------------ + +Trigger a GPU reset. Read this file to trigger reset the entire GPU. +All work currently running on the GPU will be lost. + +amdgpu_ring_ +------------------ + +Provides read access to the kernel managed ring buffers for each ring . +These are useful for debugging problems on a particular ring. The ring buffer +is how the CPU sends commands to the GPU. The CPU writes commands into the +buffer and then asks the GPU engine to process it. This is the raw binary +contents of the ring buffer. Use a tool like UMR to decode the rings into human +readable form. + +amdgpu_mqd_ +----------------- + +Provides read access to the kernel managed MQD (Memory Queue Descriptor) for +ring managed by the kernel driver. MQDs define the features of the ring +and are used to store the ring's state when it is not connected to hardware. +The driver writes the requested ring features and metadata (GPU addresses of +the ring itself and associated buffers) to the MQD and the firmware uses the MQD +to populate the hardware when the ring is mapped to a hardware slot. Only +available on engines which use MQDs. This provides access to the raw MQD +binary. + +amdgpu_error_ +------------------- + +Provides an interface to set an error code on the dma fences associated with +ring . The error code specified is propogated to all fences associated +with the ring. Use this to inject a fence error into a ring. + +amdgpu_pm_info +-------------- + +Provides human readable information about the power management features +and state of the GPU. This includes current GFX clock, Memory clock, +voltages, average SoC power, temperature, GFX load, Memory load, SMU +feature mask, VCN power state, clock and power gating features. + +amdgpu_firmware_info +-------------------- + +Lists the firmware versions for all firmwares used by the GPU. Only +entries with a non-0 version are valid. If the version is 0, the firmware +is not valid for the GPU. + +amdgpu_fence_info +----------------- + +Shows the last signalled and emitted fence sequence numbers for each +kernel driver managed ring. Fences are associated with submissions +to the engine. Emitted fences have been submitted to the ring +and signalled fences have been signalled by the GPU. Rings with a +larger emitted fence value have outstanding work that is still being +processed by the engine that owns that ring. When the emitted and +signalled fence values are equal, the ring is idle. + +amdgpu_gem_info +--------------- + +Lists all of the PIDs using the GPU and the GPU buffers that they have +allocated. This lists the buffer size, pool (VRAM, GTT, etc.), and buffer +attributes (CPU access required, CPU cache attributes, etc.). + +amdgpu_vm_info +-------------- + +Lists all of the PIDs using the GPU and the GPU buffers that they have +allocated as well as the status of those buffers relative to that process' +GPU virtual address space (e.g., evicted, idle, invalidated, etc.). + +amdgpu_sa_info +-------------- + +Prints out all of the suballocations (sa) by the suballocation manager in the +kernel driver. Prints the GPU address, size, and fence info associated +with each suballocation. The suballocations are used internally within +the kernel driver for various things. + +amdgpu__mm +---------------- + +Prints TTM information about the memory pool . + +amdgpu_vram +----------- + +Provides direct access to VRAM. Used by tools like UMR to inspect +objects in VRAM. + +amdgpu_iomem +------------ + +Provides direct access to GTT memory. Used by tools like UMR to inspect +GTT memory. + +amdgpu_regs_* +------------- + +Provides direct access to various register aperatures on the GPU. Used +by tools like UMR to access GPU registers. + +amdgpu_regs2 +------------ + +Provides an IOCTL interface used by UMR for interacting with GPU registers. + + +amdgpu_sensors +-------------- + +Provides an interface to query GPU power metrics (temperature, average +power, etc.). Used by tools like UMR to query GPU power metrics. + + +amdgpu_gca_config +----------------- + +Provides an interface to query GPU details (Graphics/Compute Array config, +PCI config, GPU family, etc.). Used by tools like UMR to query GPU details. + +amdgpu_wave +----------- + +Used to query GFX/compute wave information from the hardware. Used by tools +like UMR to query GFX/compute wave information. + +amdgpu_gpr +---------- + +Used to query GFX/compute GPR (General Purpose Register) information from the +hardware. Used by tools like UMR to query GPRs when debugging shaders. + +amdgpu_gprwave +-------------- + +Provides an IOCTL interface used by UMR for interacting with shader waves. + +amdgpu_fw_attestation +--------------------- + +Provides an interface for reading back firmware attestation records. diff --git a/Documentation/gpu/amdgpu/debugging.rst b/Documentation/gpu/amdgpu/debugging.rst index e75f97d0e4ea..7cbfea0606e1 100644 --- a/Documentation/gpu/amdgpu/debugging.rst +++ b/Documentation/gpu/amdgpu/debugging.rst @@ -2,6 +2,13 @@ GPU Debugging =============== +General Debugging Options +========================= + +The DebugFS section provides documentation on a number files to aid in debugging +issues on the GPU. + + GPUVM Debugging =============== diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 302d039928ee..4c75567854cb 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -16,5 +16,6 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. thermal driver-misc debugging + debugfs process-isolation amdgpu-glossary -- 2.51.0 From 60d4952d8908c2396e5a005e056423c578e29db6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Mar 2025 10:07:50 -0400 Subject: [PATCH 10/16] drm/amdgpu: drop some dead code Drop the cgs smu firmware code for SI, it's not used. The smu firmware fetching for SI is done in si_dpm.c. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 61 ------------------------- 1 file changed, 61 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 525e53c94f4f..004a6a9d6b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -252,67 +252,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { - case CHIP_TAHITI: - strscpy(fw_name, "radeon/tahiti_smc.bin"); - break; - case CHIP_PITCAIRN: - if ((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6810) || - (adev->pdev->device == 0x6811))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/pitcairn_k_smc.bin"); - } else { - strscpy(fw_name, "radeon/pitcairn_smc.bin"); - } - break; - case CHIP_VERDE: - if (((adev->pdev->device == 0x6820) && - ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83))) || - ((adev->pdev->device == 0x6821) && - ((adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87))) || - ((adev->pdev->revision == 0x87) && - ((adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682b)))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/verde_k_smc.bin"); - } else { - strscpy(fw_name, "radeon/verde_smc.bin"); - } - break; - case CHIP_OLAND: - if (((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6600) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605) || - (adev->pdev->device == 0x6610))) || - ((adev->pdev->revision == 0x83) && - (adev->pdev->device == 0x6610))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/oland_k_smc.bin"); - } else { - strscpy(fw_name, "radeon/oland_smc.bin"); - } - break; - case CHIP_HAINAN: - if (((adev->pdev->revision == 0x81) && - (adev->pdev->device == 0x6660)) || - ((adev->pdev->revision == 0x83) && - ((adev->pdev->device == 0x6660) || - (adev->pdev->device == 0x6663) || - (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)))) { - info->is_kicker = true; - strscpy(fw_name, "radeon/hainan_k_smc.bin"); - } else if ((adev->pdev->revision == 0xc3) && - (adev->pdev->device == 0x6665)) { - info->is_kicker = true; - strscpy(fw_name, "radeon/banks_k_2_smc.bin"); - } else { - strscpy(fw_name, "radeon/hainan_smc.bin"); - } - break; case CHIP_BONAIRE: if ((adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || -- 2.51.0 From 9eab24532691a36ce795f192ea2fb3193bc5c4b3 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 26 Mar 2025 12:53:01 +0530 Subject: [PATCH 11/16] drm/amdgpu/gfx10: Add Cleaner Shader Support for GFX10.3.x GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Enable the cleaner shader for other GFX10.3.x series of GPUs to provide data isolation between GPU workloads. The cleaner shader is responsible for clearing the Local Data Store (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs), which helps prevent data leakage and ensures accurate computation results. This update extends cleaner shader support to GFX10.3.x GPUs, previously available for GFX10.3.0. It enhances security by clearing GPU memory between processes and maintains a consistent GPU state across KGD and KFD workloads. Cc: Mario Sopena-Novales Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a63ce747863f..d1c04de0f633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4810,7 +4810,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } break; case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; @@ -4826,6 +4828,34 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(10, 3, 6): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 14 && + adev->gfx.pfp_fw_version >= 17 && + adev->gfx.mec_fw_version >= 24) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(10, 3, 7): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 4 && + adev->gfx.pfp_fw_version >= 9 && + adev->gfx.mec_fw_version >= 12) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; -- 2.51.0 From 0d47bb77b505e539a99f4f032c9c5a58f1d21e18 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 13:34:33 -0400 Subject: [PATCH 12/16] drm/amdgpu/gfx: make amdgpu_gfx_me_queue_to_bit() static It's not used outside of amdgpu_gfx.c. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 72af5e5a894a..04982b7f33a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -74,8 +74,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, adev->gfx.mec_bitmap[xcc_id].queue_bitmap); } -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, - int me, int pipe, int queue) +static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) { int bit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 87e862188766..5d70b3ae3fe1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -550,8 +550,6 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, - int pipe, int queue); bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); -- 2.51.0 From 8f970c46b56235a3965e0965fa3fd60e7567fecc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 14:10:17 -0400 Subject: [PATCH 13/16] drm/amdgpu/gfx: decouple the number of kgqs from the hw The driver currently sets up one kgq per pipe. As such adev->gfx.me.num_queue_per_pipe is hardcoded to 1 everywhere. This is fine for kernel queues, but when we enable user queues we need to know that actual number of queues per pipe. Decouple the kgq setup from the actual hardware count. For dev core dumps and user queues, we want to know the actual number of queues per pipe. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 13 +++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 3 ++- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 04982b7f33a8..f64675b2ab75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -77,11 +77,12 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue) { + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ int bit = 0; bit += me * adev->gfx.me.num_pipe_per_me - * adev->gfx.me.num_queue_per_pipe; - bit += pipe * adev->gfx.me.num_queue_per_pipe; + * num_queue_per_pipe; + bit += pipe * num_queue_per_pipe; bit += queue; return bit; @@ -238,8 +239,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe; bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); - int max_queues_per_me = adev->gfx.me.num_pipe_per_me * - adev->gfx.me.num_queue_per_pipe; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ + int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe; if (multipipe_policy) { /* policy: amdgpu owns the first queue per pipe at this stage @@ -247,9 +248,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) for (i = 0; i < max_queues_per_me; i++) { pipe = i % adev->gfx.me.num_pipe_per_me; queue = (i / adev->gfx.me.num_pipe_per_me) % - adev->gfx.me.num_queue_per_pipe; + num_queue_per_pipe; - set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, + set_bit(pipe * num_queue_per_pipe + queue, adev->gfx.me.queue_bitmap); } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d1c04de0f633..cba9b973f0a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4752,6 +4752,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -4916,7 +4917,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d57db42f9536..0c9b28a46050 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1571,6 +1571,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1703,7 +1704,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index e7b58e470292..7b20ab48f762 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1346,6 +1346,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) unsigned num_compute_rings; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1435,7 +1436,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; -- 2.51.0 From 9cffd67e803a081ce548488161c69f2cddb97fe7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 14:24:47 -0400 Subject: [PATCH 14/16] drm/amdgpu/gfx: assign the actual me0 queues per pipe Set the actual number of queues per pipe for ME0 (gfx). This way we will dump all of the queues properly in dev core dumps. Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index cba9b973f0a7..d9c2dab17f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4764,7 +4764,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 1, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -4779,7 +4779,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0c9b28a46050..c78458ecb88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1581,7 +1581,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1594,7 +1594,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 7b20ab48f762..ddac26b012bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1355,7 +1355,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; -- 2.51.0 From 8307ebc15c1ea98a8a0b7837af1faa6c01514577 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:56:02 -0400 Subject: [PATCH 15/16] drm/amdgpu/gfx6: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 9c6453b458b0..d86620f38855 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2881,8 +2881,6 @@ static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- 2.51.0 From be7652c23d833d1ab2c67b16e173b1a4e69d1ae6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Mar 2025 11:57:19 -0400 Subject: [PATCH 16/16] drm/amdgpu/gfx7: fix CSIB handling We shouldn't return after the last section. We need to update the rest of the CSIB. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 6eb48ebd3f4e..0fdc4362bbc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3909,8 +3909,6 @@ static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START); for (i = 0; i < ext->reg_count; i++) buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; } } } -- 2.51.0