#include "xe_macros.h"
 #include "xe_map.h"
 #include "xe_mocs.h"
+#include "xe_module.h"
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 #include "xe_trace.h"
 #define ENGINE_STATE_SUSPENDED         (1 << 5)
 #define EXEC_QUEUE_STATE_RESET         (1 << 6)
 #define ENGINE_STATE_KILLED            (1 << 7)
+#define EXEC_QUEUE_STATE_WEDGED                (1 << 8)
 
 static bool exec_queue_registered(struct xe_exec_queue *q)
 {
        atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
 }
 
-static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
+static bool exec_queue_wedged(struct xe_exec_queue *q)
 {
-       return exec_queue_killed(q) || exec_queue_banned(q);
+       return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
+}
+
+static void set_exec_queue_wedged(struct xe_exec_queue *q)
+{
+       atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
+}
+
+static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
+{
+       return exec_queue_banned(q) || (atomic_read(&q->guc->state) &
+               (EXEC_QUEUE_STATE_WEDGED | ENGINE_STATE_KILLED));
 }
 
 #ifdef CONFIG_PROVE_LOCKING
        free_submit_wq(guc);
 }
 
+static void guc_submit_wedged_fini(struct drm_device *drm, void *arg)
+{
+       struct xe_guc *guc = arg;
+       struct xe_exec_queue *q;
+       unsigned long index;
+
+       xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+               if (exec_queue_wedged(q))
+                       xe_exec_queue_put(q);
+}
+
 static const struct xe_exec_queue_ops guc_exec_queue_ops;
 
 static void primelockdep(struct xe_guc *guc)
 
        trace_xe_sched_job_run(job);
 
-       if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
+       if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
                if (!exec_queue_registered(q))
                        register_engine(q);
                if (!lr)        /* LR jobs are emitted in the exec IOCTL */
                xe_sched_tdr_queue_imm(&q->guc->sched);
 }
 
+static void guc_submit_wedged(struct xe_guc *guc)
+{
+       struct xe_exec_queue *q;
+       unsigned long index;
+       int err;
+
+       xe_device_declare_wedged(guc_to_xe(guc));
+       xe_guc_submit_reset_prepare(guc);
+       xe_guc_ct_stop(&guc->ct);
+
+       err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
+                                      guc_submit_wedged_fini, guc);
+       if (err)
+               return;
+
+       mutex_lock(&guc->submission_state.lock);
+       xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+               if (xe_exec_queue_get_unless_zero(q))
+                       set_exec_queue_wedged(q);
+       mutex_unlock(&guc->submission_state.lock);
+}
+
 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 {
        struct xe_guc_exec_queue *ge =
        struct xe_guc *guc = exec_queue_to_guc(q);
        struct xe_device *xe = guc_to_xe(guc);
        struct xe_gpu_scheduler *sched = &ge->sched;
+       bool wedged = xe_device_wedged(xe);
 
        xe_assert(xe, xe_exec_queue_is_lr(q));
        trace_xe_exec_queue_lr_cleanup(q);
 
+       if (!wedged && xe_modparam.wedged_mode == 2) {
+               guc_submit_wedged(exec_queue_to_guc(q));
+               wedged = true;
+       }
+
        /* Kill the run_job / process_msg entry points */
        xe_sched_submission_stop(sched);
 
         * xe_guc_deregister_done_handler() which treats it as an unexpected
         * state.
         */
-       if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
+       if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
                struct xe_guc *guc = exec_queue_to_guc(q);
                int ret;
 
        struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
        int err = -ETIME;
        int i = 0;
+       bool wedged = xe_device_wedged(xe);
 
        /*
         * TDR has fired before free job worker. Common if exec queue
 
        trace_xe_sched_job_timedout(job);
 
+       if (!wedged && xe_modparam.wedged_mode == 2) {
+               guc_submit_wedged(exec_queue_to_guc(q));
+               wedged = true;
+       }
+
        /* Kill the run_job entry point */
        xe_sched_submission_stop(sched);
 
         * Kernel jobs should never fail, nor should VM jobs if they do
         * somethings has gone wrong and the GT needs a reset
         */
-       if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
-           (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
+       if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
+                       (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
                if (!xe_sched_invalidate_job(job, 2)) {
                        xe_sched_add_pending_job(sched, job);
                        xe_sched_submission_start(sched);
        }
 
        /* Engine state now stable, disable scheduling if needed */
-       if (exec_queue_registered(q)) {
+       if (!wedged && exec_queue_registered(q)) {
                struct xe_guc *guc = exec_queue_to_guc(q);
                int ret;
 
         */
        xe_sched_add_pending_job(sched, job);
        xe_sched_submission_start(sched);
+
        xe_guc_exec_queue_trigger_cleanup(q);
 
        /* Mark all outstanding jobs as bad, thus completing them */
        INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
 
        /* We must block on kernel engines so slabs are empty on driver unload */
-       if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
+       if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
                __guc_exec_queue_fini_async(&q->guc->fini_async);
        else
                queue_work(system_wq, &q->guc->fini_async);
 
 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
 {
-       return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
+       return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
 }
 
 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
 {
        struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
 
-       if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
+       if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
                guc_exec_queue_add_msg(q, msg, CLEANUP);
        else
                __guc_exec_queue_fini(exec_queue_to_guc(q), q);
 {
        struct xe_sched_msg *msg;
 
-       if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
+       if (q->sched_props.priority == priority ||
+           exec_queue_killed_or_banned_or_wedged(q))
                return 0;
 
        msg = kmalloc(sizeof(*msg), GFP_KERNEL);
        struct xe_sched_msg *msg;
 
        if (q->sched_props.timeslice_us == timeslice_us ||
-           exec_queue_killed_or_banned(q))
+           exec_queue_killed_or_banned_or_wedged(q))
                return 0;
 
        msg = kmalloc(sizeof(*msg), GFP_KERNEL);
        struct xe_sched_msg *msg;
 
        if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
-           exec_queue_killed_or_banned(q))
+           exec_queue_killed_or_banned_or_wedged(q))
                return 0;
 
        msg = kmalloc(sizeof(*msg), GFP_KERNEL);
 {
        struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
 
-       if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
+       if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
                return -EINVAL;
 
        q->guc->suspend_pending = true;
 {
        struct xe_gpu_scheduler *sched = &q->guc->sched;
 
-       if (!exec_queue_killed_or_banned(q)) {
+       if (!exec_queue_killed_or_banned_or_wedged(q)) {
                int i;
 
                trace_xe_exec_queue_resubmit(q);