vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
        }
 
-       if (unlikely(skb->no_fcs)) {
-               lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
-               goto normal_tx;
+       if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+               struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+               if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb) &&
+                   atomic_dec_if_positive(&ptp->tx_avail) >= 0) {
+                       if (!bnxt_ptp_parse(skb, &ptp->tx_seqid)) {
+                               lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP);
+                               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+                       } else {
+                               atomic_inc(&bp->ptp_cfg->tx_avail);
+                       }
+               }
        }
 
-       if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
+       if (unlikely(skb->no_fcs))
+               lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
+
+       if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh &&
+           !lflags) {
                struct tx_push_buffer *tx_push_buf = txr->tx_push;
                struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
                struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
 
        netdev_tx_sent_queue(txq, skb->len);
 
+       skb_tx_timestamp(skb);
+
        /* Sync BD data before updating doorbell */
        wmb();
 
        return NETDEV_TX_OK;
 
 tx_dma_error:
+       if (BNXT_TX_PTP_IS_SET(lflags))
+               atomic_inc(&bp->ptp_cfg->tx_avail);
+
        last_frag = i;
 
        /* start back at beginning and unmap skb */
 
        for (i = 0; i < nr_pkts; i++) {
                struct bnxt_sw_tx_bd *tx_buf;
+               bool compl_deferred = false;
                struct sk_buff *skb;
                int j, last;
 
                                skb_frag_size(&skb_shinfo(skb)->frags[j]),
                                PCI_DMA_TODEVICE);
                }
+               if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+                       if (bp->flags & BNXT_FLAG_CHIP_P5) {
+                               if (!bnxt_get_tx_ts_p5(bp, skb))
+                                       compl_deferred = true;
+                               else
+                                       atomic_inc(&bp->ptp_cfg->tx_avail);
+                       }
+               }
 
 next_tx_int:
                cons = NEXT_TX(cons);
 
                tx_bytes += skb->len;
-               dev_kfree_skb_any(skb);
+               if (!compl_deferred)
+                       dev_kfree_skb_any(skb);
        }
 
        netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
 
 #include <linux/net_tstamp.h>
 #include <linux/timecounter.h>
 #include <linux/timekeeping.h>
+#include <linux/ptp_classify.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_ptp.h"
 
+int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id)
+{
+       unsigned int ptp_class;
+       struct ptp_header *hdr;
+
+       ptp_class = ptp_classify_raw(skb);
+
+       switch (ptp_class & PTP_CLASS_VMASK) {
+       case PTP_CLASS_V1:
+       case PTP_CLASS_V2:
+               hdr = ptp_parse_header(skb, ptp_class);
+               if (!hdr)
+                       return -EINVAL;
+
+               *seq_id  = ntohs(hdr->sequence_id);
+               return 0;
+       default:
+               return -ERANGE;
+       }
+}
+
 static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
                            const struct timespec64 *ts)
 {
        spin_unlock_bh(&ptp->ptp_lock);
 }
 
+static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts)
+{
+       struct hwrm_port_ts_query_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_port_ts_query_input req = {0};
+       int rc;
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_TS_QUERY, -1, -1);
+       req.flags = cpu_to_le32(flags);
+       if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) ==
+           PORT_TS_QUERY_REQ_FLAGS_PATH_TX) {
+               req.enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
+               req.ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
+               req.ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT);
+       }
+       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (!rc)
+               *ts = le64_to_cpu(resp->ptp_msg_ts);
+       mutex_unlock(&bp->hwrm_cmd_lock);
+       return rc;
+}
+
 static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
                             struct timespec64 *ts,
                             struct ptp_system_timestamp *sts)
        return bnxt_refclk_read(ptp->bp, NULL);
 }
 
+static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       struct skb_shared_hwtstamps timestamp;
+       u64 ts = 0, ns = 0;
+       int rc;
+
+       rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts);
+       if (!rc) {
+               memset(×tamp, 0, sizeof(timestamp));
+               spin_lock_bh(&ptp->ptp_lock);
+               ns = timecounter_cyc2time(&ptp->tc, ts);
+               spin_unlock_bh(&ptp->ptp_lock);
+               timestamp.hwtstamp = ns_to_ktime(ns);
+               skb_tstamp_tx(ptp->tx_skb, ×tamp);
+       } else {
+               netdev_err(bp->dev, "TS query for TX timer failed rc = %x\n",
+                          rc);
+       }
+
+       dev_kfree_skb_any(ptp->tx_skb);
+       ptp->tx_skb = NULL;
+       atomic_inc(&ptp->tx_avail);
+}
+
 static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
 {
        struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
                                                ptp_info);
+       unsigned long now = jiffies;
        struct bnxt *bp = ptp->bp;
 
+       if (ptp->tx_skb)
+               bnxt_stamp_tx_skb(bp, ptp->tx_skb);
+
+       if (!time_after_eq(now, ptp->next_period))
+               return ptp->next_period - now;
+
        bnxt_ptp_get_current_time(bp);
+       ptp->next_period = now + HZ;
        return HZ;
 }
 
+int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (ptp->tx_skb) {
+               netdev_err(bp->dev, "deferring skb:one SKB is still outstanding\n");
+               return -EBUSY;
+       }
+       ptp->tx_skb = skb;
+       ptp_schedule_worker(ptp->ptp_clock, 0);
+       return 0;
+}
+
 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
 {
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
                ptp_clock_unregister(ptp->ptp_clock);
 
        ptp->ptp_clock = NULL;
+       if (ptp->tx_skb) {
+               dev_kfree_skb_any(ptp->tx_skb);
+               ptp->tx_skb = NULL;
+       }
        bnxt_unmap_ptp_regs(bp);
 }