* - UDP 9091 packets trigger TX reply
  * - TX HW timestamp is requested and reported back upon completion
  * - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
  */
 
 #include <test_progs.h>
 #include <time.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include <linux/ethtool.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
 
 #include "xdp_metadata.h"
 
 bool skip_tx;
 __u64 last_hw_rx_timestamp;
 __u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp;
+int launch_time_queue;
+
+#define run_command(cmd, ...)                                  \
+({                                                             \
+       char command[1024];                                     \
+       memset(command, 0, sizeof(command));                    \
+       snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+       fprintf(stderr, "Running: %s\n", command);              \
+       system(command);                                        \
+})
 
 void test__fail(void) { /* for network_helpers.c */ }
 
        if (meta->completion.tx_timestamp) {
                __u64 ref_tstamp = gettime(clock_id);
 
+               if (launch_time_delta_to_hw_rx_timestamp) {
+                       print_tstamp_delta("HW Launch-time",
+                                          "HW TX-complete-time",
+                                          last_launch_time,
+                                          meta->completion.tx_timestamp);
+               }
                print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
                                   meta->completion.tx_timestamp, ref_tstamp);
                print_tstamp_delta("XDP RX-time", "User TX-complete-time",
               xsk, ntohs(udph->check), ntohs(want_csum),
               meta->request.csum_start, meta->request.csum_offset);
 
+       /* Set the value of launch time */
+       if (launch_time_delta_to_hw_rx_timestamp) {
+               meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+               meta->request.launch_time = last_hw_rx_timestamp +
+                                           launch_time_delta_to_hw_rx_timestamp;
+               last_launch_time = meta->request.launch_time;
+               print_tstamp_delta("HW RX-time", "HW Launch-time",
+                                  last_hw_rx_timestamp,
+                                  meta->request.launch_time);
+       }
+
        memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
        tx_desc->options |= XDP_TX_METADATA;
        tx_desc->len = len;
        const struct xdp_desc *rx_desc;
        struct pollfd fds[rxq + 1];
        __u64 comp_addr;
+       __u64 deadline;
        __u64 addr;
        __u32 idx = 0;
        int ret;
                                        if (ret)
                                                printf("kick_tx ret=%d\n", ret);
 
-                                       for (int j = 0; j < 500; j++) {
+                                       /* wait 1 second + cover launch time */
+                                       deadline = gettime(clock_id) +
+                                                  NANOSEC_PER_SEC +
+                                                  launch_time_delta_to_hw_rx_timestamp;
+                                       while (true) {
                                                if (complete_tx(xsk, clock_id))
                                                        break;
+                                               if (gettime(clock_id) >= deadline)
+                                                       break;
                                                usleep(10);
                                        }
                                }
                "  -h    Display this help and exit\n\n"
                "  -m    Enable multi-buffer XDP for larger MTU\n"
                "  -r    Don't generate AF_XDP reply (rx metadata only)\n"
+               "  -l    Delta of launch time relative to HW RX-time in ns\n"
+               "        default: 0 ns (launch time request is disabled)\n"
+               "  -L    Tx Queue to be enabled with launch time offload\n"
+               "        default: 0 (Tx Queue 0)\n"
                "Generate test packets on the other machine with:\n"
                "  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
 
 {
        int opt;
 
-       while ((opt = getopt(argc, argv, "chmr")) != -1) {
+       while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) {
                switch (opt) {
                case 'c':
                        bind_flags &= ~XDP_USE_NEED_WAKEUP;
                case 'r':
                        skip_tx = true;
                        break;
+               case 'l':
+                       launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+                       break;
+               case 'L':
+                       launch_time_queue = atoll(optarg);
+                       break;
                case '?':
                        if (isprint(optopt))
                                fprintf(stderr, "Unknown option: -%c\n", optopt);
                error(-1, errno, "Invalid interface name");
 }
 
+void clean_existing_configurations(void)
+{
+       /* Check and delete root qdisc if exists */
+       if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0)
+               run_command("sudo tc qdisc del dev %s root", ifname);
+
+       /* Check and delete ingress qdisc if exists */
+       if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0)
+               run_command("sudo tc qdisc del dev %s ingress", ifname);
+
+       /* Check and delete ethtool filters if any exist */
+       if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) {
+               run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2",
+                           ifname, ifname);
+       }
+}
+
+#define MAX_TC 16
+
 int main(int argc, char *argv[])
 {
        clockid_t clock_id = CLOCK_TAI;
+       struct bpf_program *prog;
        int server_fd = -1;
+       size_t map_len = 0;
+       size_t que_len = 0;
+       char *buf = NULL;
+       char *map = NULL;
+       char *que = NULL;
+       char *tmp = NULL;
+       int tc = 0;
        int ret;
        int i;
 
-       struct bpf_program *prog;
-
        read_args(argc, argv);
 
        rxq = rxq_num(ifname);
-
        printf("rxq: %d\n", rxq);
 
+       if (launch_time_queue >= rxq || launch_time_queue < 0)
+               error(1, 0, "Invalid launch_time_queue.");
+
+       clean_existing_configurations();
+       sleep(1);
+
+       /* Enable tx and rx hardware timestamping */
        hwtstamp_enable(ifname);
 
+       /* Prepare priority to traffic class map for tc-mqprio */
+       for (i = 0; i < MAX_TC; i++) {
+               if (i < rxq)
+                       tc = i;
+
+               if (asprintf(&buf, "%d ", tc) == -1) {
+                       printf("Failed to malloc buf for tc map.\n");
+                       goto free_mem;
+               }
+
+               map_len += strlen(buf);
+               tmp = realloc(map, map_len + 1);
+               if (!tmp) {
+                       printf("Failed to realloc tc map.\n");
+                       goto free_mem;
+               }
+               map = tmp;
+               strcat(map, buf);
+               free(buf);
+               buf = NULL;
+       }
+
+       /* Prepare traffic class to hardware queue map for tc-mqprio */
+       for (i = 0; i <= tc; i++) {
+               if (asprintf(&buf, "1@%d ", i) == -1) {
+                       printf("Failed to malloc buf for tc queues.\n");
+                       goto free_mem;
+               }
+
+               que_len += strlen(buf);
+               tmp = realloc(que, que_len + 1);
+               if (!tmp) {
+                       printf("Failed to realloc tc queues.\n");
+                       goto free_mem;
+               }
+               que = tmp;
+               strcat(que, buf);
+               free(buf);
+               buf = NULL;
+       }
+
+       /* Add mqprio qdisc */
+       run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0",
+                   ifname, tc + 1, map, que);
+
+       /* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */
+       if (launch_time_delta_to_hw_rx_timestamp) {
+               /* Enable launch time hardware offload on launch_time_queue */
+               run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000",
+                           ifname, launch_time_queue + 1);
+               sleep(1);
+
+               /* Route incoming packet with VLAN priority 1 into launch_time_queue */
+               if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d",
+                               ifname, launch_time_queue)) {
+                       run_command("sudo tc qdisc add dev %s ingress", ifname);
+                       run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d",
+                                   ifname, launch_time_queue);
+               }
+
+               /* Enable VLAN tag stripping offload */
+               run_command("sudo ethtool -K %s rxvlan on", ifname);
+       }
+
        rx_xsk = malloc(sizeof(struct xsk) * rxq);
        if (!rx_xsk)
                error(1, ENOMEM, "malloc");
        cleanup();
        if (ret)
                error(1, -ret, "verify_metadata");
+
+       clean_existing_configurations();
+
+free_mem:
+       free(buf);
+       free(map);
+       free(que);
 }