u32 sq_stack_resource_mgmt_2;
        u32 sq_stack_resource_mgmt_3;
        u32 vgt_cache_invalidation;
-       u32 hdp_host_path_cntl;
+       u32 hdp_host_path_cntl, tmp;
        int i, j, num_shader_engines, ps_thread_count;
 
        switch (rdev->family) {
        for (i = SQ_ALU_CONST_BUFFER_SIZE_HS_0; i < 0x29000; i += 4)
                WREG32(i, 0);
 
+       tmp = RREG32(HDP_MISC_CNTL);
+       tmp |= HDP_FLUSH_INVALIDATE_CACHE;
+       WREG32(HDP_MISC_CNTL, tmp);
+
        hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
        WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
 
 
 #define GB_BACKEND_MAP                                 0x98FC
 #define DMIF_ADDR_CONFIG                               0xBD4
 #define HDP_ADDR_CONFIG                                0x2F48
+#define HDP_MISC_CNTL                                          0x2F4C
+#define                HDP_FLUSH_INVALIDATE_CACHE              (1 << 0)
 
 #define        CC_SYS_RB_BACKEND_DISABLE                       0x3F88
 #define        GC_USER_RB_BACKEND_DISABLE                      0x9B7C