return 0;
 }
 
-static inline bool nicvf_xdp_rx(struct nicvf *nic,
-                               struct bpf_prog *prog,
-                               struct cqe_rx_t *cqe_rx)
+static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
+                               struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
 {
        struct xdp_buff xdp;
        struct page *page;
 
        switch (action) {
        case XDP_PASS:
-       case XDP_TX:
-               /* Pass on all packets to network stack */
+               /* Pass on packet to network stack */
                return false;
+       case XDP_TX:
+               nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
+               return true;
        default:
                bpf_warn_invalid_xdp_action(action);
        case XDP_ABORTED:
                                  unsigned int *tx_pkts, unsigned int *tx_bytes)
 {
        struct sk_buff *skb = NULL;
+       struct page *page;
        struct nicvf *nic = netdev_priv(netdev);
        struct snd_queue *sq;
        struct sq_hdr_subdesc *hdr;
        if (cqe_tx->send_status)
                nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx);
 
+       /* Is this a XDP designated Tx queue */
+       if (sq->is_xdp) {
+               page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr];
+               /* Check if it's recycled page or else unmap DMA mapping */
+               if (page && (page_ref_count(page) == 1))
+                       nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+                                                hdr->subdesc_cnt);
+
+               /* Release page reference for recycling */
+               if (page)
+                       put_page(page);
+               sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL;
+               *subdesc_cnt += hdr->subdesc_cnt + 1;
+               return;
+       }
+
        skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
        if (skb) {
                /* Check for dummy descriptor used for HW TSO offload on 88xx */
 
 static void nicvf_rcv_pkt_handler(struct net_device *netdev,
                                  struct napi_struct *napi,
-                                 struct cqe_rx_t *cqe_rx)
+                                 struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
 {
        struct sk_buff *skb;
        struct nicvf *nic = netdev_priv(netdev);
 
        /* For XDP, ignore pkts spanning multiple pages */
        if (nic->xdp_prog && (cqe_rx->rb_cnt == 1))
-               if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx))
+               if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq))
                        return;
 
        skb = nicvf_get_rcv_skb(snic, cqe_rx, nic->xdp_prog ? true : false);
        struct cmp_queue *cq = &qs->cq[cq_idx];
        struct cqe_rx_t *cq_desc;
        struct netdev_queue *txq;
-       struct snd_queue *sq;
-       unsigned int tx_pkts = 0, tx_bytes = 0;
+       struct snd_queue *sq = &qs->sq[cq_idx];
+       unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
 
        spin_lock_bh(&cq->lock);
 loop:
 
                switch (cq_desc->cqe_type) {
                case CQE_TYPE_RX:
-                       nicvf_rcv_pkt_handler(netdev, napi, cq_desc);
+                       nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
                        work_done++;
                break;
                case CQE_TYPE_SEND:
                goto loop;
 
 done:
-       sq = &nic->qs->sq[cq_idx];
        /* Update SQ's descriptor free count */
        if (subdesc_cnt)
                nicvf_put_sq_desc(sq, subdesc_cnt);
 
+       txq_idx = nicvf_netdev_qidx(nic, cq_idx);
+       /* Handle XDP TX queues */
+       if (nic->pnicvf->xdp_prog) {
+               if (txq_idx < nic->pnicvf->xdp_tx_queues) {
+                       nicvf_xdp_sq_doorbell(nic, sq, cq_idx);
+                       goto out;
+               }
+               nic = nic->pnicvf;
+               txq_idx -= nic->pnicvf->xdp_tx_queues;
+       }
+
        /* Wakeup TXQ if its stopped earlier due to SQ full */
        if (tx_done ||
            (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) {
                netdev = nic->pnicvf->netdev;
-               txq = netdev_get_tx_queue(netdev,
-                                         nicvf_netdev_qidx(nic, cq_idx));
+               txq = netdev_get_tx_queue(netdev, txq_idx);
                if (tx_pkts)
                        netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
 
                        if (netif_msg_tx_err(nic))
                                netdev_warn(netdev,
                                            "%s: Transmit queue wakeup SQ%d\n",
-                                           netdev->name, cq_idx);
+                                           netdev->name, txq_idx);
                }
        }
 
+out:
        spin_unlock_bh(&cq->lock);
        return work_done;
 }
                return NETDEV_TX_OK;
        }
 
+       /* In XDP case, initial HW tx queues are used for XDP,
+        * but stack's queue mapping starts at '0', so skip the
+        * Tx queues attached to Rx queues for XDP.
+        */
+       if (nic->xdp_prog)
+               qid += nic->xdp_tx_queues;
+
        snic = nic;
        /* Get secondary Qset's SQ structure */
        if (qid >= MAX_SND_QUEUES_PER_QS) {
 
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
+static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
+                                              int size, u64 data);
 static void nicvf_get_page(struct nicvf *nic)
 {
        if (!nic->rb_pageref || !nic->rb_page)
 
 /* Initialize transmit queue */
 static int nicvf_init_snd_queue(struct nicvf *nic,
-                               struct snd_queue *sq, int q_len)
+                               struct snd_queue *sq, int q_len, int qidx)
 {
        int err;
 
        sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
        if (!sq->skbuff)
                return -ENOMEM;
+
        sq->head = 0;
        sq->tail = 0;
-       atomic_set(&sq->free_cnt, q_len - 1);
        sq->thresh = SND_QUEUE_THRESH;
 
-       /* Preallocate memory for TSO segment's header */
-       sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
-                                         q_len * TSO_HEADER_SIZE,
-                                         &sq->tso_hdrs_phys, GFP_KERNEL);
-       if (!sq->tso_hdrs)
-               return -ENOMEM;
+       /* Check if this SQ is a XDP TX queue */
+       if (nic->sqs_mode)
+               qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
+       if (qidx < nic->pnicvf->xdp_tx_queues) {
+               /* Alloc memory to save page pointers for XDP_TX */
+               sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
+               if (!sq->xdp_page)
+                       return -ENOMEM;
+               sq->xdp_desc_cnt = 0;
+               sq->xdp_free_cnt = q_len - 1;
+               sq->is_xdp = true;
+       } else {
+               sq->xdp_page = NULL;
+               sq->xdp_desc_cnt = 0;
+               sq->xdp_free_cnt = 0;
+               sq->is_xdp = false;
+
+               atomic_set(&sq->free_cnt, q_len - 1);
+
+               /* Preallocate memory for TSO segment's header */
+               sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
+                                                 q_len * TSO_HEADER_SIZE,
+                                                 &sq->tso_hdrs_phys,
+                                                 GFP_KERNEL);
+               if (!sq->tso_hdrs)
+                       return -ENOMEM;
+       }
 
        return 0;
 }
 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 {
        struct sk_buff *skb;
+       struct page *page;
        struct sq_hdr_subdesc *hdr;
        struct sq_hdr_subdesc *tso_sqe;
 
        smp_rmb();
        while (sq->head != sq->tail) {
                skb = (struct sk_buff *)sq->skbuff[sq->head];
-               if (!skb)
+               if (!skb || !sq->xdp_page)
+                       goto next;
+
+               page = (struct page *)sq->xdp_page[sq->head];
+               if (!page)
                        goto next;
+               else
+                       put_page(page);
+
                hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
                /* Check for dummy descriptor used for HW TSO offload on 88xx */
                if (hdr->dont_send) {
                        nicvf_unmap_sndq_buffers(nic, sq, sq->head,
                                                 hdr->subdesc_cnt);
                }
-               dev_kfree_skb_any(skb);
+               if (skb)
+                       dev_kfree_skb_any(skb);
 next:
                sq->head++;
                sq->head &= (sq->dmem.q_len - 1);
        }
        kfree(sq->skbuff);
+       kfree(sq->xdp_page);
        nicvf_free_q_desc_mem(nic, &sq->dmem);
 }
 
 
        /* Alloc send queue */
        for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
-               if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len))
+               if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
                        goto alloc_fail;
        }
 
        int qentry;
 
        qentry = sq->tail;
-       atomic_sub(desc_cnt, &sq->free_cnt);
+       if (!sq->is_xdp)
+               atomic_sub(desc_cnt, &sq->free_cnt);
+       else
+               sq->xdp_free_cnt -= desc_cnt;
        sq->tail += desc_cnt;
        sq->tail &= (sq->dmem.q_len - 1);
 
 /* Free descriptor back to SQ for future use */
 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
 {
-       atomic_add(desc_cnt, &sq->free_cnt);
+       if (!sq->is_xdp)
+               atomic_add(desc_cnt, &sq->free_cnt);
+       else
+               sq->xdp_free_cnt += desc_cnt;
        sq->head += desc_cnt;
        sq->head &= (sq->dmem.q_len - 1);
 }
        }
 }
 
+/* XDP Transmit APIs */
+void nicvf_xdp_sq_doorbell(struct nicvf *nic,
+                          struct snd_queue *sq, int sq_num)
+{
+       if (!sq->xdp_desc_cnt)
+               return;
+
+       /* make sure all memory stores are done before ringing doorbell */
+       wmb();
+
+       /* Inform HW to xmit all TSO segments */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
+                             sq_num, sq->xdp_desc_cnt);
+       sq->xdp_desc_cnt = 0;
+}
+
+static inline void
+nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
+                            int subdesc_cnt, u64 data, int len)
+{
+       struct sq_hdr_subdesc *hdr;
+
+       hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+       memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+       hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+       hdr->subdesc_cnt = subdesc_cnt;
+       hdr->tot_len = len;
+       hdr->post_cqe = 1;
+       sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
+}
+
+int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
+                           u64 bufaddr, u64 dma_addr, u16 len)
+{
+       int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
+       int qentry;
+
+       if (subdesc_cnt > sq->xdp_free_cnt)
+               return 0;
+
+       qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
+
+       nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
+
+       qentry = nicvf_get_nxt_sqentry(sq, qentry);
+       nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
+
+       sq->xdp_desc_cnt += subdesc_cnt;
+
+       return 1;
+}
+
 /* Calculate no of SQ subdescriptors needed to transmit all
  * segments of this TSO packet.
  * Taken from 'Tilera network driver' with a minor modification.