{
        struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
        u32 doorbell_offset, doorbell;
-       u32 rb_cntl, ib_cntl;
+       u32 rb_cntl, ib_cntl, sdma_cntl;
        int i;
 
        for_each_inst(i, inst_mask) {
                ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
                ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
                WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+               sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+               sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0);
+               WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
 
                if (sdma[i]->use_doorbell) {
                        doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
                /* set utc l1 enable flag always to 1 */
                temp = RREG32_SDMA(i, regSDMA_CNTL);
                temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+               WREG32_SDMA(i, regSDMA_CNTL, temp);
 
                if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
                        /* enable context empty interrupt during initialization */