#include <linux/device.h>
 #include <linux/mm.h>
 #include <linux/kthread.h>
+#include <linux/delay.h>
 #include "../common/sst-dsp.h"
 #include "../common/sst-dsp-priv.h"
 
                        SKL_ADSP_REG_ADSPIC, SKL_ADSPIC_CL_DMA, 0);
 }
 
+static void skl_cldma_stream_run(struct sst_dsp  *ctx, bool enable)
+{
+       unsigned char val;
+       int timeout;
+
+       sst_dsp_shim_update_bits_unlocked(ctx,
+                       SKL_ADSP_REG_CL_SD_CTL,
+                       CL_SD_CTL_RUN_MASK, CL_SD_CTL_RUN(enable));
+
+       udelay(3);
+       timeout = 300;
+       do {
+               /* waiting for hardware to report that the stream Run bit set */
+               val = sst_dsp_shim_read(ctx, SKL_ADSP_REG_CL_SD_CTL) &
+                       CL_SD_CTL_RUN_MASK;
+               if (enable && val)
+                       break;
+               else if (!enable && !val)
+                       break;
+               udelay(3);
+       } while (--timeout);
+
+       if (timeout == 0)
+               dev_err(ctx->dev, "Failed to set Run bit=%d enable=%d\n", val, enable);
+}
+
 /* Code loader helper APIs */
 static void skl_cldma_setup_bdle(struct sst_dsp *ctx,
                struct snd_dma_buffer *dmab_data,
        sst_dsp_shim_write_unlocked(ctx, SKL_ADSP_REG_CL_SPBFIFO_SPIB, 0);
 }
 
-static void skl_cldma_trigger(struct sst_dsp  *ctx, bool enable)
-{
-       if (enable)
-               sst_dsp_shim_update_bits_unlocked(ctx,
-                       SKL_ADSP_REG_CL_SD_CTL,
-                       CL_SD_CTL_RUN_MASK, CL_SD_CTL_RUN(1));
-       else
-               sst_dsp_shim_update_bits_unlocked(ctx,
-                       SKL_ADSP_REG_CL_SD_CTL,
-                       CL_SD_CTL_RUN_MASK, CL_SD_CTL_RUN(0));
-}
-
 static void skl_cldma_cleanup(struct sst_dsp  *ctx)
 {
        skl_cldma_cleanup_spb(ctx);
 
 static void skl_cldma_stop(struct sst_dsp *ctx)
 {
-       ctx->cl_dev.ops.cl_trigger(ctx, false);
+       skl_cldma_stream_run(ctx, false);
 }
 
 static void skl_cldma_fill_buffer(struct sst_dsp *ctx, unsigned int size,
        ctx->cl_dev.ops.cl_setup_controller = skl_cldma_setup_controller;
        ctx->cl_dev.ops.cl_setup_spb = skl_cldma_setup_spb;
        ctx->cl_dev.ops.cl_cleanup_spb = skl_cldma_cleanup_spb;
-       ctx->cl_dev.ops.cl_trigger = skl_cldma_trigger;
+       ctx->cl_dev.ops.cl_trigger = skl_cldma_stream_run;
        ctx->cl_dev.ops.cl_cleanup_controller = skl_cldma_cleanup;
        ctx->cl_dev.ops.cl_copy_to_dmabuf = skl_cldma_copy_to_buf;
        ctx->cl_dev.ops.cl_stop_dma = skl_cldma_stop;