* Guts of ath10k_ce_completed_recv_next.
  * The caller takes responsibility for any necessary locking.
  */
-static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
-                                               void **per_transfer_contextp,
-                                               u32 *bufferp,
-                                               unsigned int *nbytesp,
-                                               unsigned int *transfer_idp,
-                                               unsigned int *flagsp)
+int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
+                                        void **per_transfer_contextp,
+                                        u32 *bufferp,
+                                        unsigned int *nbytesp,
+                                        unsigned int *transfer_idp,
+                                        unsigned int *flagsp)
 {
        struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
        unsigned int nentries_mask = dest_ring->nentries_mask;
  * Guts of ath10k_ce_completed_send_next.
  * The caller takes responsibility for any necessary locking.
  */
-static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
-                                               void **per_transfer_contextp,
-                                               u32 *bufferp,
-                                               unsigned int *nbytesp,
-                                               unsigned int *transfer_idp)
+int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
+                                        void **per_transfer_contextp,
+                                        u32 *bufferp,
+                                        unsigned int *nbytesp,
+                                        unsigned int *transfer_idp)
 {
        struct ath10k_ce_ring *src_ring = ce_state->src_ring;
        u32 ctrl_addr = ce_state->ctrl_addr;
 
                                  unsigned int *nbytesp,
                                  unsigned int *transfer_idp);
 
+int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
+                                        void **per_transfer_contextp,
+                                        u32 *bufferp,
+                                        unsigned int *nbytesp,
+                                        unsigned int *transfer_idp);
+
 /*==================CE Engine Initialization=======================*/
 
 int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
                               void **per_transfer_contextp,
                               u32 *bufferp);
 
+int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
+                                        void **per_transfer_contextp,
+                                        u32 *bufferp,
+                                        unsigned int *nbytesp,
+                                        unsigned int *transfer_idp,
+                                        unsigned int *flagsp);
+
 /*
  * Support clean shutdown by allowing the caller to cancel
  * pending sends.  Target DMA must be stopped before using
 
        int (*tx_sg)(struct ath10k *ar, u8 pipe_id,
                     struct ath10k_hif_sg_item *items, int n_items);
 
+       /* read firmware memory through the diagnose interface */
+       int (*diag_read)(struct ath10k *ar, u32 address, void *buf,
+                        size_t buf_len);
+
        /*
         * API to handle HIF-specific BMI message exchanges, this API is
         * synchronous and only allowed to be called from a context that
        return ar->hif.ops->tx_sg(ar, pipe_id, items, n_items);
 }
 
+static inline int ath10k_hif_diag_read(struct ath10k *ar, u32 address, void *buf,
+                                      size_t buf_len)
+{
+       return ar->hif.ops->diag_read(ar, address, buf, buf_len);
+}
+
 static inline int ath10k_hif_exchange_bmi_msg(struct ath10k *ar,
                                              void *request, u32 request_len,
                                              void *response, u32 *response_len)
 
        void *data_buf = NULL;
        int i;
 
+       spin_lock_bh(&ar_pci->ce_lock);
+
        ce_diag = ar_pci->ce_diag;
 
        /*
                nbytes = min_t(unsigned int, remaining_bytes,
                               DIAG_TRANSFER_LIMIT);
 
-               ret = ath10k_ce_rx_post_buf(ce_diag, NULL, ce_data);
+               ret = __ath10k_ce_rx_post_buf(ce_diag, NULL, ce_data);
                if (ret != 0)
                        goto done;
 
                address = TARG_CPU_SPACE_TO_CE_SPACE(ar, ar_pci->mem,
                                                     address);
 
-               ret = ath10k_ce_send(ce_diag, NULL, (u32)address, nbytes, 0,
-                                    0);
+               ret = ath10k_ce_send_nolock(ce_diag, NULL, (u32)address, nbytes, 0,
+                                           0);
                if (ret)
                        goto done;
 
                i = 0;
-               while (ath10k_ce_completed_send_next(ce_diag, NULL, &buf,
-                                                    &completed_nbytes,
-                                                    &id) != 0) {
+               while (ath10k_ce_completed_send_next_nolock(ce_diag, NULL, &buf,
+                                                           &completed_nbytes,
+                                                           &id) != 0) {
                        mdelay(1);
                        if (i++ > DIAG_ACCESS_CE_TIMEOUT_MS) {
                                ret = -EBUSY;
                }
 
                i = 0;
-               while (ath10k_ce_completed_recv_next(ce_diag, NULL, &buf,
-                                                    &completed_nbytes,
-                                                    &id, &flags) != 0) {
+               while (ath10k_ce_completed_recv_next_nolock(ce_diag, NULL, &buf,
+                                                           &completed_nbytes,
+                                                           &id, &flags) != 0) {
                        mdelay(1);
 
                        if (i++ > DIAG_ACCESS_CE_TIMEOUT_MS) {
                dma_free_coherent(ar->dev, orig_nbytes, data_buf,
                                  ce_data_base);
 
+       spin_unlock_bh(&ar_pci->ce_lock);
+
        return ret;
 }
 
        dma_addr_t ce_data_base = 0;
        int i;
 
+       spin_lock_bh(&ar_pci->ce_lock);
+
        ce_diag = ar_pci->ce_diag;
 
        /*
                nbytes = min_t(int, remaining_bytes, DIAG_TRANSFER_LIMIT);
 
                /* Set up to receive directly into Target(!) address */
-               ret = ath10k_ce_rx_post_buf(ce_diag, NULL, address);
+               ret = __ath10k_ce_rx_post_buf(ce_diag, NULL, address);
                if (ret != 0)
                        goto done;
 
                 * Request CE to send caller-supplied data that
                 * was copied to bounce buffer to Target(!) address.
                 */
-               ret = ath10k_ce_send(ce_diag, NULL, (u32)ce_data,
-                                    nbytes, 0, 0);
+               ret = ath10k_ce_send_nolock(ce_diag, NULL, (u32)ce_data,
+                                           nbytes, 0, 0);
                if (ret != 0)
                        goto done;
 
                i = 0;
-               while (ath10k_ce_completed_send_next(ce_diag, NULL, &buf,
-                                                    &completed_nbytes,
-                                                    &id) != 0) {
+               while (ath10k_ce_completed_send_next_nolock(ce_diag, NULL, &buf,
+                                                           &completed_nbytes,
+                                                           &id) != 0) {
                        mdelay(1);
 
                        if (i++ > DIAG_ACCESS_CE_TIMEOUT_MS) {
                }
 
                i = 0;
-               while (ath10k_ce_completed_recv_next(ce_diag, NULL, &buf,
-                                                    &completed_nbytes,
-                                                    &id, &flags) != 0) {
+               while (ath10k_ce_completed_recv_next_nolock(ce_diag, NULL, &buf,
+                                                           &completed_nbytes,
+                                                           &id, &flags) != 0) {
                        mdelay(1);
 
                        if (i++ > DIAG_ACCESS_CE_TIMEOUT_MS) {
                ath10k_warn(ar, "failed to write diag value at 0x%x: %d\n",
                            address, ret);
 
+       spin_unlock_bh(&ar_pci->ce_lock);
+
        return ret;
 }
 
        return err;
 }
 
+static int ath10k_pci_hif_diag_read(struct ath10k *ar, u32 address, void *buf,
+                                   size_t buf_len)
+{
+       return ath10k_pci_diag_read_mem(ar, address, buf, buf_len);
+}
+
 static u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe)
 {
        struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 
 static const struct ath10k_hif_ops ath10k_pci_hif_ops = {
        .tx_sg                  = ath10k_pci_hif_tx_sg,
+       .diag_read              = ath10k_pci_hif_diag_read,
        .exchange_bmi_msg       = ath10k_pci_hif_exchange_bmi_msg,
        .start                  = ath10k_pci_hif_start,
        .stop                   = ath10k_pci_hif_stop,