reply = (void *)skb->data;
        err = -be32_to_cpu(reply->error);
        if (err) {
-               if (err != -ENOSPC)
+               if (err == -ENOSPC) {
+                       if (!atomic_fetch_inc(&nn->ktls_no_space))
+                               nn_info(nn, "HW TLS table full\n");
+               } else {
                        nn_dp_warn(&nn->dp,
                                   "failed to add TLS, FW replied: %d\n", err);
+               }
                goto err_free_skb;
        }
 
 
 #ifndef _NFP_NET_H_
 #define _NFP_NET_H_
 
+#include <linux/atomic.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
  * @hw_csum_tx_inner:   Counter of inner TX checksum offload requests
  * @tx_gather:     Counter of packets with Gather DMA
  * @tx_lso:        Counter of LSO packets sent
+ * @hw_tls_tx:     Counter of TLS packets sent with crypto offloaded to HW
+ * @tls_tx_fallback:   Counter of TLS packets sent which had to be encrypted
+ *                     by the fallback path because packets came out of order
+ * @tls_tx_no_fallback:        Counter of TLS packets not sent because the fallback
+ *                     path could not encrypt them
  * @tx_errors:     How many TX errors were encountered
  * @tx_busy:        How often was TX busy (no space)?
  * @rx_replace_buf_alloc_fail: Counter of RX buffer allocation failures
        u64 hw_csum_rx_inner_ok;
        u64 hw_csum_rx_complete;
 
+       u64 hw_csum_rx_error;
+       u64 rx_replace_buf_alloc_fail;
+
        struct nfp_net_tx_ring *xdp_ring;
 
        struct u64_stats_sync tx_sync;
        u64 tx_pkts;
        u64 tx_bytes;
-       u64 hw_csum_tx;
+
+       u64 ____cacheline_aligned_in_smp hw_csum_tx;
        u64 hw_csum_tx_inner;
        u64 tx_gather;
        u64 tx_lso;
+       u64 hw_tls_tx;
 
-       u64 hw_csum_rx_error;
-       u64 rx_replace_buf_alloc_fail;
+       u64 tls_tx_fallback;
+       u64 tls_tx_no_fallback;
        u64 tx_errors;
        u64 tx_busy;
 
+       /* Cold data follows */
+
        u32 irq_vector;
        irq_handler_t handler;
        char name[IFNAMSIZ + 8];
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @tlv_caps:          Parsed TLV capabilities
  * @ktls_tx_conn_cnt:  Number of offloaded kTLS TX connections
+ * @ktls_no_space:     Counter of firmware rejecting kTLS connection due to
+ *                     lack of space
  * @mbox_cmsg:         Common Control Message via vNIC mailbox state
  * @mbox_cmsg.queue:   CCM mbox queue of pending messages
  * @mbox_cmsg.wq:      CCM mbox wait queue of waiting processes
 
        unsigned int ktls_tx_conn_cnt;
 
+       atomic_t ktls_no_space;
+
        struct {
                struct sk_buff_head queue;
                wait_queue_head_t wq;
 
 
 #ifdef CONFIG_TLS_DEVICE
 static struct sk_buff *
-nfp_net_tls_tx(struct nfp_net_dp *dp, struct sk_buff *skb, u64 *tls_handle,
-              int *nr_frags)
+nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
+              struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
 {
        struct nfp_net_tls_offload_ctx *ntls;
        struct sk_buff *nskb;
                if (!datalen)
                        return skb;
 
+               u64_stats_update_begin(&r_vec->tx_sync);
+               r_vec->tls_tx_fallback++;
+               u64_stats_update_end(&r_vec->tx_sync);
+
                nskb = tls_encrypt_skb(skb);
-               if (!nskb)
+               if (!nskb) {
+                       u64_stats_update_begin(&r_vec->tx_sync);
+                       r_vec->tls_tx_no_fallback++;
+                       u64_stats_update_end(&r_vec->tx_sync);
                        return NULL;
+               }
                /* encryption wasn't necessary */
                if (nskb == skb)
                        return skb;
                /* we don't re-check ring space */
                if (unlikely(skb_is_nonlinear(nskb))) {
                        nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
+                       u64_stats_update_begin(&r_vec->tx_sync);
+                       r_vec->tx_errors++;
+                       u64_stats_update_end(&r_vec->tx_sync);
                        dev_kfree_skb_any(nskb);
                        return NULL;
                }
                return nskb;
        }
 
+       if (datalen) {
+               u64_stats_update_begin(&r_vec->tx_sync);
+               r_vec->hw_tls_tx++;
+               u64_stats_update_end(&r_vec->tx_sync);
+       }
+
        memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
        ntls->next_seq += datalen;
        return skb;
        }
 
 #ifdef CONFIG_TLS_DEVICE
-       skb = nfp_net_tls_tx(dp, skb, &tls_handle, &nr_frags);
-       if (unlikely(!skb))
-               goto err_flush;
+       skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
+       if (unlikely(!skb)) {
+               nfp_net_tx_xmit_more_flush(tx_ring);
+               return NETDEV_TX_OK;
+       }
 #endif
 
        md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
 
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
 #define NN_ET_SWITCH_STATS_LEN 9
-#define NN_RVEC_GATHER_STATS   9
+#define NN_RVEC_GATHER_STATS   12
 #define NN_RVEC_PER_Q_STATS    3
+#define NN_CTRL_PATH_STATS     1
 
 #define SFP_SFF_REV_COMPLIANCE 1
 
 {
        struct nfp_net *nn = netdev_priv(netdev);
 
-       return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS;
+       return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS +
+               NN_CTRL_PATH_STATS;
 }
 
 static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
        data = nfp_pr_et(data, "hw_tx_inner_csum");
        data = nfp_pr_et(data, "tx_gather");
        data = nfp_pr_et(data, "tx_lso");
+       data = nfp_pr_et(data, "tx_tls_encrypted");
+       data = nfp_pr_et(data, "tx_tls_ooo");
+       data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
+
+       data = nfp_pr_et(data, "hw_tls_no_space");
 
        return data;
 }
                        tmp[6] = nn->r_vecs[i].hw_csum_tx_inner;
                        tmp[7] = nn->r_vecs[i].tx_gather;
                        tmp[8] = nn->r_vecs[i].tx_lso;
+                       tmp[9] = nn->r_vecs[i].hw_tls_tx;
+                       tmp[10] = nn->r_vecs[i].tls_tx_fallback;
+                       tmp[11] = nn->r_vecs[i].tls_tx_no_fallback;
                } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
 
                data += NN_RVEC_PER_Q_STATS;
        for (j = 0; j < NN_RVEC_GATHER_STATS; j++)
                *data++ = gathered_stats[j];
 
+       *data++ = atomic_read(&nn->ktls_no_space);
+
        return data;
 }