Indicates the absolute minimum limit of bytes allowed to be
                queued on this network device transmit queue. Default value is
                0.
+
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_thrs
+Date:          Jan 2024
+KernelVersion: 6.9
+Contact:       netdev@vger.kernel.org
+Description:
+               Tx completion stall detection threshold in ms. Kernel will
+               guarantee to detect all stalls longer than this threshold but
+               may also detect stalls longer than half of the threshold.
+
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_cnt
+Date:          Jan 2024
+KernelVersion: 6.9
+Contact:       netdev@vger.kernel.org
+Description:
+               Number of detected Tx completion stalls.
+
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_max
+Date:          Jan 2024
+KernelVersion: 6.9
+Contact:       netdev@vger.kernel.org
+Description:
+               Longest detected Tx completion stall. Write 0 to clear.
 
 
 #ifdef __KERNEL__
 
+#include <linux/bitops.h>
 #include <asm/bug.h>
 
+#define DQL_HIST_LEN           4
+#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN])
+
 struct dql {
        /* Fields accessed in enqueue path (dql_queued) */
        unsigned int    num_queued;             /* Total ever queued */
        unsigned int    adj_limit;              /* limit + num_completed */
        unsigned int    last_obj_cnt;           /* Count at last queuing */
 
+       unsigned long   history_head;           /* top 58 bits of jiffies */
+       /* stall entries, a bit per entry */
+       unsigned long   history[DQL_HIST_LEN];
+
        /* Fields accessed only by completion path (dql_completed) */
 
        unsigned int    limit ____cacheline_aligned_in_smp; /* Current limit */
        unsigned int    max_limit;              /* Max limit */
        unsigned int    min_limit;              /* Minimum limit */
        unsigned int    slack_hold_time;        /* Time to measure slack */
+
+       /* Stall threshold (in jiffies), defined by user */
+       unsigned short  stall_thrs;
+       /* Longest stall detected, reported to user */
+       unsigned short  stall_max;
+       unsigned long   last_reap;              /* Last reap (in jiffies) */
+       unsigned long   stall_cnt;              /* Number of stalls */
 };
 
 /* Set some static maximums */
  */
 static inline void dql_queued(struct dql *dql, unsigned int count)
 {
+       unsigned long map, now, now_hi, i;
+
        BUG_ON(count > DQL_MAX_OBJECT);
 
        dql->last_obj_cnt = count;
        barrier();
 
        dql->num_queued += count;
+
+       now = jiffies;
+       now_hi = now / BITS_PER_LONG;
+
+       /* The following code set a bit in the ring buffer, where each
+        * bit trackes time the packet was queued. The dql->history buffer
+        * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot
+        */
+       if (unlikely(now_hi != dql->history_head)) {
+               /* About to reuse slots, clear them */
+               for (i = 0; i < DQL_HIST_LEN; i++) {
+                       /* Multiplication masks high bits */
+                       if (now_hi * BITS_PER_LONG ==
+                           (dql->history_head + i) * BITS_PER_LONG)
+                               break;
+                       DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0;
+               }
+               /* pairs with smp_rmb() in dql_check_stall() */
+               smp_wmb();
+               WRITE_ONCE(dql->history_head, now_hi);
+       }
+
+       /* __set_bit() does not guarantee WRITE_ONCE() semantics */
+       map = DQL_HIST_ENT(dql, now_hi);
+
+       /* Populate the history with an entry (bit) per queued */
+       if (!(map & BIT_MASK(now)))
+               WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
 }
 
 /* Returns how many objects can be queued, < 0 indicates over limit. */
 
                  __entry->work, __entry->budget)
 );
 
+TRACE_EVENT(dql_stall_detected,
+
+       TP_PROTO(unsigned short thrs, unsigned int len,
+                unsigned long last_reap, unsigned long hist_head,
+                unsigned long now, unsigned long *hist),
+
+       TP_ARGS(thrs, len, last_reap, hist_head, now, hist),
+
+       TP_STRUCT__entry(
+               __field(        unsigned short,         thrs)
+               __field(        unsigned int,           len)
+               __field(        unsigned long,          last_reap)
+               __field(        unsigned long,          hist_head)
+               __field(        unsigned long,          now)
+               __array(        unsigned long,          hist, 4)
+       ),
+
+       TP_fast_assign(
+               __entry->thrs = thrs;
+               __entry->len = len;
+               __entry->last_reap = last_reap;
+               __entry->hist_head = hist_head * BITS_PER_LONG;
+               __entry->now = now;
+               memcpy(__entry->hist, hist, sizeof(entry->hist));
+       ),
+
+       TP_printk("thrs %u  len %u  last_reap %lu  hist_head %lu  now %lu  hist %016lx %016lx %016lx %016lx",
+                 __entry->thrs, __entry->len,
+                 __entry->last_reap, __entry->hist_head, __entry->now,
+                 __entry->hist[0], __entry->hist[1],
+                 __entry->hist[2], __entry->hist[3])
+);
+
 #undef NO_DEV
 
 #endif /* _TRACE_NAPI_H */
 
 #include <linux/dynamic_queue_limits.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
+#include <trace/events/napi.h>
 
 #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
 #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
 
+static void dql_check_stall(struct dql *dql)
+{
+       unsigned short stall_thrs;
+       unsigned long now;
+
+       stall_thrs = READ_ONCE(dql->stall_thrs);
+       if (!stall_thrs)
+               return;
+
+       now = jiffies;
+       /* Check for a potential stall */
+       if (time_after_eq(now, dql->last_reap + stall_thrs)) {
+               unsigned long hist_head, t, start, end;
+
+               /* We are trying to detect a period of at least @stall_thrs
+                * jiffies without any Tx completions, but during first half
+                * of which some Tx was posted.
+                */
+dqs_again:
+               hist_head = READ_ONCE(dql->history_head);
+               /* pairs with smp_wmb() in dql_queued() */
+               smp_rmb();
+
+               /* Get the previous entry in the ring buffer, which is the
+                * oldest sample.
+                */
+               start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG;
+
+               /* Advance start to continue from the last reap time */
+               if (time_before(start, dql->last_reap + 1))
+                       start = dql->last_reap + 1;
+
+               /* Newest sample we should have already seen a completion for */
+               end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1);
+
+               /* Shrink the search space to [start, (now - start_thrs/2)] if
+                * `end` is beyond the stall zone
+                */
+               if (time_before(now, end + stall_thrs / 2))
+                       end = now - stall_thrs / 2;
+
+               /* Search for the queued time in [t, end] */
+               for (t = start; time_before_eq(t, end); t++)
+                       if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG),
+                                    dql->history))
+                               break;
+
+               /* Variable t contains the time of the queue */
+               if (!time_before_eq(t, end))
+                       goto no_stall;
+
+               /* The ring buffer was modified in the meantime, retry */
+               if (hist_head != READ_ONCE(dql->history_head))
+                       goto dqs_again;
+
+               dql->stall_cnt++;
+               dql->stall_max = max_t(unsigned short, dql->stall_max, now - t);
+
+               trace_dql_stall_detected(dql->stall_thrs, now - t,
+                                        dql->last_reap, dql->history_head,
+                                        now, dql->history);
+       }
+no_stall:
+       dql->last_reap = now;
+}
+
 /* Records completed count and recalculates the queue limit */
 void dql_completed(struct dql *dql, unsigned int count)
 {
        dql->prev_last_obj_cnt = dql->last_obj_cnt;
        dql->num_completed = completed;
        dql->prev_num_queued = num_queued;
+
+       dql_check_stall(dql);
 }
 EXPORT_SYMBOL(dql_completed);
 
        dql->prev_ovlimit = 0;
        dql->lowest_slack = UINT_MAX;
        dql->slack_start_time = jiffies;
+
+       dql->last_reap = jiffies;
+       dql->history_head = jiffies / BITS_PER_LONG;
+       memset(dql->history, 0, sizeof(dql->history));
 }
 EXPORT_SYMBOL(dql_reset);
 
        dql->max_limit = DQL_MAX_LIMIT;
        dql->min_limit = 0;
        dql->slack_hold_time = hold_time;
+       dql->stall_thrs = 0;
        dql_reset(dql);
 }
 EXPORT_SYMBOL(dql_init);
 
        = __ATTR(hold_time, 0644,
                 bql_show_hold_time, bql_set_hold_time);
 
+static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+{
+       struct dql *dql = &queue->dql;
+
+       return sprintf(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
+}
+
+static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
+                                 const char *buf, size_t len)
+{
+       struct dql *dql = &queue->dql;
+       unsigned int value;
+       int err;
+
+       err = kstrtouint(buf, 10, &value);
+       if (err < 0)
+               return err;
+
+       value = msecs_to_jiffies(value);
+       if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
+               return -ERANGE;
+
+       if (!dql->stall_thrs && value)
+               dql->last_reap = jiffies;
+       /* Force last_reap to be live */
+       smp_wmb();
+       dql->stall_thrs = value;
+
+       return len;
+}
+
+static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
+       __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
+
+static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+{
+       return sprintf(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
+}
+
+static ssize_t bql_set_stall_max(struct netdev_queue *queue,
+                                const char *buf, size_t len)
+{
+       WRITE_ONCE(queue->dql.stall_max, 0);
+       return len;
+}
+
+static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
+       __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
+
+static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+{
+       struct dql *dql = &queue->dql;
+
+       return sprintf(buf, "%lu\n", dql->stall_cnt);
+}
+
+static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
+       __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
+
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
                                 char *buf)
 {
        &bql_limit_min_attribute.attr,
        &bql_hold_time_attribute.attr,
        &bql_inflight_attribute.attr,
+       &bql_stall_thrs_attribute.attr,
+       &bql_stall_cnt_attribute.attr,
+       &bql_stall_max_attribute.attr,
        NULL
 };