#include <linux/pinctrl/consumer.h>
 #include <linux/sizes.h>
 #include <linux/io.h>
+#include <linux/workqueue.h>
 
 #define UART_NR                        14
 
        unsigned int            lcrh_tx;        /* vendor-specific */
        unsigned int            lcrh_rx;        /* vendor-specific */
        unsigned int            old_cr;         /* state during shutdown */
+       struct delayed_work     tx_softirq_work;
        bool                    autorts;
+       unsigned int            tx_irq_seen;    /* 0=none, 1=1, 2=2 or more */
        char                    type[12];
 #ifdef CONFIG_DMA_ENGINE
        /* DMA stuff */
                dma_release_channel(uap->dmarx.chan);
 }
 
-/* Forward declare this for the refill routine */
+/* Forward declare these for the refill routine */
 static int pl011_dma_tx_refill(struct uart_amba_port *uap);
+static void pl011_start_tx_pio(struct uart_amba_port *uap);
 
 /*
  * The current DMA TX buffer has been sent.
                return;
        }
 
-       if (pl011_dma_tx_refill(uap) <= 0) {
+       if (pl011_dma_tx_refill(uap) <= 0)
                /*
                 * We didn't queue a DMA buffer for some reason, but we
                 * have data pending to be sent.  Re-enable the TX IRQ.
                 */
-               uap->im |= UART011_TXIM;
-               writew(uap->im, uap->port.membase + UART011_IMSC);
-       }
+               pl011_start_tx_pio(uap);
+
        spin_unlock_irqrestore(&uap->port.lock, flags);
 }
 
                if (!uap->dmatx.queued) {
                        if (pl011_dma_tx_refill(uap) > 0) {
                                uap->im &= ~UART011_TXIM;
-                               ret = true;
-                       } else {
-                               uap->im |= UART011_TXIM;
+                               writew(uap->im, uap->port.membase +
+                                      UART011_IMSC);
+                       } else
                                ret = false;
-                       }
-                       writew(uap->im, uap->port.membase + UART011_IMSC);
                } else if (!(uap->dmacr & UART011_TXDMAE)) {
                        uap->dmacr |= UART011_TXDMAE;
                        writew(uap->dmacr,
        pl011_dma_tx_stop(uap);
 }
 
+static bool pl011_tx_chars(struct uart_amba_port *uap);
+
+/* Start TX with programmed I/O only (no DMA) */
+static void pl011_start_tx_pio(struct uart_amba_port *uap)
+{
+       uap->im |= UART011_TXIM;
+       writew(uap->im, uap->port.membase + UART011_IMSC);
+       if (!uap->tx_irq_seen)
+               pl011_tx_chars(uap);
+}
+
 static void pl011_start_tx(struct uart_port *port)
 {
        struct uart_amba_port *uap =
            container_of(port, struct uart_amba_port, port);
 
-       if (!pl011_dma_tx_start(uap)) {
-               uap->im |= UART011_TXIM;
-               writew(uap->im, uap->port.membase + UART011_IMSC);
-       }
+       if (!pl011_dma_tx_start(uap))
+               pl011_start_tx_pio(uap);
 }
 
 static void pl011_stop_rx(struct uart_port *port)
        spin_lock(&uap->port.lock);
 }
 
-static void pl011_tx_chars(struct uart_amba_port *uap)
+/*
+ * Transmit a character
+ * There must be at least one free entry in the TX FIFO to accept the char.
+ *
+ * Returns true if the FIFO might have space in it afterwards;
+ * returns false if the FIFO definitely became full.
+ */
+static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c)
+{
+       writew(c, uap->port.membase + UART01x_DR);
+       uap->port.icount.tx++;
+
+       if (likely(uap->tx_irq_seen > 1))
+               return true;
+
+       return !(readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF);
+}
+
+static bool pl011_tx_chars(struct uart_amba_port *uap)
 {
        struct circ_buf *xmit = &uap->port.state->xmit;
        int count;
 
+       if (unlikely(uap->tx_irq_seen < 2))
+               /*
+                * Initial FIFO fill level unknown: we must check TXFF
+                * after each write, so just try to fill up the FIFO.
+                */
+               count = uap->fifosize;
+       else /* tx_irq_seen >= 2 */
+               /*
+                * FIFO initially at least half-empty, so we can simply
+                * write half the FIFO without polling TXFF.
+
+                * Note: the *first* TX IRQ can still race with
+                * pl011_start_tx_pio(), which can result in the FIFO
+                * being fuller than expected in that case.
+                */
+               count = uap->fifosize >> 1;
+
+       /*
+        * If the FIFO is full we're guaranteed a TX IRQ at some later point,
+        * and can't transmit immediately in any case:
+        */
+       if (unlikely(uap->tx_irq_seen < 2 &&
+                    readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF))
+               return false;
+
        if (uap->port.x_char) {
-               writew(uap->port.x_char, uap->port.membase + UART01x_DR);
-               uap->port.icount.tx++;
+               pl011_tx_char(uap, uap->port.x_char);
                uap->port.x_char = 0;
-               return;
+               --count;
        }
        if (uart_circ_empty(xmit) || uart_tx_stopped(&uap->port)) {
                pl011_stop_tx(&uap->port);
-               return;
+               goto done;
        }
 
        /* If we are using DMA mode, try to send some characters. */
        if (pl011_dma_tx_irq(uap))
-               return;
+               goto done;
 
-       count = uap->fifosize >> 1;
-       do {
-               writew(xmit->buf[xmit->tail], uap->port.membase + UART01x_DR);
+       while (count-- > 0 && pl011_tx_char(uap, xmit->buf[xmit->tail])) {
                xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-               uap->port.icount.tx++;
                if (uart_circ_empty(xmit))
                        break;
-       } while (--count > 0);
+       }
 
        if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
                uart_write_wakeup(&uap->port);
 
-       if (uart_circ_empty(xmit))
+       if (uart_circ_empty(xmit)) {
                pl011_stop_tx(&uap->port);
+               goto done;
+       }
+
+       if (unlikely(!uap->tx_irq_seen))
+               schedule_delayed_work(&uap->tx_softirq_work, uap->port.timeout);
+
+done:
+       return false;
 }
 
 static void pl011_modem_status(struct uart_amba_port *uap)
        wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 }
 
+static void pl011_tx_softirq(struct work_struct *work)
+{
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct uart_amba_port *uap =
+               container_of(dwork, struct uart_amba_port, tx_softirq_work);
+
+       spin_lock(&uap->port.lock);
+       while (pl011_tx_chars(uap)) ;
+       spin_unlock(&uap->port.lock);
+}
+
+static void pl011_tx_irq_seen(struct uart_amba_port *uap)
+{
+       if (likely(uap->tx_irq_seen > 1))
+               return;
+
+       uap->tx_irq_seen++;
+       if (uap->tx_irq_seen < 2)
+               /* first TX IRQ */
+               cancel_delayed_work(&uap->tx_softirq_work);
+}
+
 static irqreturn_t pl011_int(int irq, void *dev_id)
 {
        struct uart_amba_port *uap = dev_id;
                        if (status & (UART011_DSRMIS|UART011_DCDMIS|
                                      UART011_CTSMIS|UART011_RIMIS))
                                pl011_modem_status(uap);
-                       if (status & UART011_TXIS)
+                       if (status & UART011_TXIS) {
+                               pl011_tx_irq_seen(uap);
                                pl011_tx_chars(uap);
+                       }
 
                        if (pass_counter-- == 0)
                                break;
 {
        struct uart_amba_port *uap =
            container_of(port, struct uart_amba_port, port);
-       unsigned int cr, lcr_h, fbrd, ibrd;
+       unsigned int cr;
        int retval;
 
        retval = pl011_hwinit(port);
 
        writew(uap->vendor->ifls, uap->port.membase + UART011_IFLS);
 
-       /*
-        * Provoke TX FIFO interrupt into asserting. Taking care to preserve
-        * baud rate and data format specified by FBRD, IBRD and LCRH as the
-        * UART may already be in use as a console.
-        */
-       spin_lock_irq(&uap->port.lock);
-
-       fbrd = readw(uap->port.membase + UART011_FBRD);
-       ibrd = readw(uap->port.membase + UART011_IBRD);
-       lcr_h = readw(uap->port.membase + uap->lcrh_rx);
-
-       cr = UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_LBE;
-       writew(cr, uap->port.membase + UART011_CR);
-       writew(0, uap->port.membase + UART011_FBRD);
-       writew(1, uap->port.membase + UART011_IBRD);
-       pl011_write_lcr_h(uap, 0);
-       writew(0, uap->port.membase + UART01x_DR);
-       while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
-               barrier();
+       /* Assume that TX IRQ doesn't work until we see one: */
+       uap->tx_irq_seen = 0;
 
-       writew(fbrd, uap->port.membase + UART011_FBRD);
-       writew(ibrd, uap->port.membase + UART011_IBRD);
-       pl011_write_lcr_h(uap, lcr_h);
+       spin_lock_irq(&uap->port.lock);
 
        /* restore RTS and DTR */
        cr = uap->old_cr & (UART011_CR_RTS | UART011_CR_DTR);
            container_of(port, struct uart_amba_port, port);
        unsigned int cr;
 
+       cancel_delayed_work_sync(&uap->tx_softirq_work);
+
        /*
         * disable all interrupts
         */
        uap->port.ops = &amba_pl011_pops;
        uap->port.flags = UPF_BOOT_AUTOCONF;
        uap->port.line = i;
+       INIT_DELAYED_WORK(&uap->tx_softirq_work, pl011_tx_softirq);
        pl011_dma_probe(&dev->dev, uap);
 
        /* Ensure interrupts from this UART are masked and cleared */