*                             by upper layers directly
  * @buf_size/count/start:      markers for chip->read_buf/write_buf functions
  * @reg_read_buf:              local buffer for reading back registers via DMA
+ * @reg_read_dma:              contains dma address for register read buffer
  * @reg_read_pos:              marker for data read in reg_read_buf
  *
  * @regs:                      a contiguous chunk of memory for DMA register
        int             buf_start;
 
        __le32 *reg_read_buf;
+       dma_addr_t reg_read_dma;
        int reg_read_pos;
 
        struct nandc_regs *regs;
        iowrite32(val, nandc->base + offset);
 }
 
+static inline void nandc_read_buffer_sync(struct qcom_nand_controller *nandc,
+                                         bool is_cpu)
+{
+       if (!nandc->props->is_bam)
+               return;
+
+       if (is_cpu)
+               dma_sync_single_for_cpu(nandc->dev, nandc->reg_read_dma,
+                                       MAX_REG_RD *
+                                       sizeof(*nandc->reg_read_buf),
+                                       DMA_FROM_DEVICE);
+       else
+               dma_sync_single_for_device(nandc->dev, nandc->reg_read_dma,
+                                          MAX_REG_RD *
+                                          sizeof(*nandc->reg_read_buf),
+                                          DMA_FROM_DEVICE);
+}
+
 static __le32 *offset_to_nandc_reg(struct nandc_regs *regs, int offset)
 {
        switch (offset) {
 static void clear_read_regs(struct qcom_nand_controller *nandc)
 {
        nandc->reg_read_pos = 0;
+       nandc_read_buffer_sync(nandc, false);
 }
 
 static void pre_command(struct qcom_nand_host *host, int command)
        int i;
 
        num_cw = command == NAND_CMD_PAGEPROG ? ecc->steps : 1;
+       nandc_read_buffer_sync(nandc, true);
 
        for (i = 0; i < num_cw; i++) {
                u32 flash_status = le32_to_cpu(nandc->reg_read_buf[i]);
 
        switch (command) {
        case NAND_CMD_READID:
+               nandc_read_buffer_sync(nandc, true);
                memcpy(nandc->data_buffer, nandc->reg_read_buf,
                       nandc->buf_count);
                break;
        int i;
 
        buf = (struct read_stats *)nandc->reg_read_buf;
+       nandc_read_buffer_sync(nandc, true);
 
        for (i = 0; i < ecc->steps; i++, buf++) {
                u32 flash, buffer, erased_cw;
                return -ENOMEM;
 
        if (nandc->props->is_bam) {
+               nandc->reg_read_dma =
+                       dma_map_single(nandc->dev, nandc->reg_read_buf,
+                                      MAX_REG_RD *
+                                      sizeof(*nandc->reg_read_buf),
+                                      DMA_FROM_DEVICE);
+               if (dma_mapping_error(nandc->dev, nandc->reg_read_dma)) {
+                       dev_err(nandc->dev, "failed to DMA MAP reg buffer\n");
+                       return -EIO;
+               }
+
                nandc->tx_chan = dma_request_slave_channel(nandc->dev, "tx");
                if (!nandc->tx_chan) {
                        dev_err(nandc->dev, "failed to request tx channel\n");
 static void qcom_nandc_unalloc(struct qcom_nand_controller *nandc)
 {
        if (nandc->props->is_bam) {
+               if (!dma_mapping_error(nandc->dev, nandc->reg_read_dma))
+                       dma_unmap_single(nandc->dev, nandc->reg_read_dma,
+                                        MAX_REG_RD *
+                                        sizeof(*nandc->reg_read_buf),
+                                        DMA_FROM_DEVICE);
+
                if (nandc->tx_chan)
                        dma_release_channel(nandc->tx_chan);