ndelay(cqspi->wr_delay);
 
        while (remaining > 0) {
+               size_t write_words, mod_bytes;
+
                write_bytes = remaining > page_size ? page_size : remaining;
-               iowrite32_rep(cqspi->ahb_base, txbuf,
-                             DIV_ROUND_UP(write_bytes, 4));
+               write_words = write_bytes / 4;
+               mod_bytes = write_bytes % 4;
+               /* Write 4 bytes at a time then single bytes. */
+               if (write_words) {
+                       iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
+                       txbuf += (write_words * 4);
+               }
+               if (mod_bytes) {
+                       unsigned int temp = 0xFFFFFFFF;
+
+                       memcpy(&temp, txbuf, mod_bytes);
+                       iowrite32(temp, cqspi->ahb_base);
+                       txbuf += mod_bytes;
+               }
 
                if (!wait_for_completion_timeout(&cqspi->transfer_complete,
                                        msecs_to_jiffies(CQSPI_TIMEOUT_MS))) {
                        goto failwr;
                }
 
-               txbuf += write_bytes;
                remaining -= write_bytes;
 
                if (remaining > 0)