#include "changk104.h"
 #include "user.h"
 
+#include <core/client.h>
 #include <core/gpuobj.h>
+#include <subdev/bar.h>
+#include <subdev/fault.h>
+#include <subdev/top.h>
+#include <subdev/timer.h>
+#include <engine/sw.h>
 
 #include <nvif/class.h>
 
        .cgrp_force = true,
 };
 
+static void
+tu102_fifo_recover_work(struct work_struct *w)
+{
+       struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
+       struct nvkm_device *device = fifo->base.engine.subdev.device;
+       struct nvkm_engine *engine;
+       unsigned long flags;
+       u32 engm, runm, todo;
+       int engn, runl;
+
+       spin_lock_irqsave(&fifo->base.lock, flags);
+       runm = fifo->recover.runm;
+       engm = fifo->recover.engm;
+       fifo->recover.engm = 0;
+       fifo->recover.runm = 0;
+       spin_unlock_irqrestore(&fifo->base.lock, flags);
+
+       nvkm_mask(device, 0x002630, runm, runm);
+
+       for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
+               if ((engine = fifo->engine[engn].engine)) {
+                       nvkm_subdev_fini(&engine->subdev, false);
+                       WARN_ON(nvkm_subdev_init(&engine->subdev));
+               }
+       }
+
+       for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
+               gk104_fifo_runlist_update(fifo, runl);
+
+       nvkm_wr32(device, 0x00262c, runm);
+       nvkm_mask(device, 0x002630, runm, 0x00000000);
+}
+
+static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+
+static void
+tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
+{
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       const u32 runm = BIT(runl);
+
+       assert_spin_locked(&fifo->base.lock);
+       if (fifo->recover.runm & runm)
+               return;
+       fifo->recover.runm |= runm;
+
+       /* Block runlist to prevent channel assignment(s) from changing. */
+       nvkm_mask(device, 0x002630, runm, runm);
+
+       /* Schedule recovery. */
+       nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
+       schedule_work(&fifo->recover.work);
+}
+
+static struct gk104_fifo_chan *
+tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
+{
+       struct gk104_fifo_chan *chan;
+       struct nvkm_fifo_cgrp *cgrp;
+
+       list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+               if (chan->base.chid == chid) {
+                       list_del_init(&chan->head);
+                       return chan;
+               }
+       }
+
+       list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
+               if (cgrp->id == chid) {
+                       chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
+                       list_del_init(&chan->head);
+                       if (!--cgrp->chan_nr)
+                               list_del_init(&cgrp->head);
+                       return chan;
+               }
+       }
+
+       return NULL;
+}
+
+static void
+tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid)
+{
+       struct gk104_fifo *fifo = gk104_fifo(base);
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
+       const u32  runl = (stat & 0x000f0000) >> 16;
+       const bool used = (stat & 0x00000001);
+       unsigned long engn, engm = fifo->runlist[runl].engm;
+       struct gk104_fifo_chan *chan;
+
+       assert_spin_locked(&fifo->base.lock);
+       if (!used)
+               return;
+
+       /* Lookup SW state for channel, and mark it as dead. */
+       chan = tu102_fifo_recover_chid(fifo, runl, chid);
+       if (chan) {
+               chan->killed = true;
+               nvkm_fifo_kevent(&fifo->base, chid);
+       }
+
+       /* Disable channel. */
+       nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
+       nvkm_warn(subdev, "channel %d: killed\n", chid);
+
+       /* Block channel assignments from changing during recovery. */
+       tu102_fifo_recover_runl(fifo, runl);
+
+       /* Schedule recovery for any engines the channel is on. */
+       for_each_set_bit(engn, &engm, fifo->engine_nr) {
+               struct gk104_fifo_engine_status status;
+
+               gk104_fifo_engine_status(fifo, engn, &status);
+               if (!status.chan || status.chan->id != chid)
+                       continue;
+               tu102_fifo_recover_engn(fifo, engn);
+       }
+}
+
+static void
+tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+{
+       struct nvkm_engine *engine = fifo->engine[engn].engine;
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       const u32 runl = fifo->engine[engn].runl;
+       const u32 engm = BIT(engn);
+       struct gk104_fifo_engine_status status;
+       int mmui = -1;
+
+       assert_spin_locked(&fifo->base.lock);
+       if (fifo->recover.engm & engm)
+               return;
+       fifo->recover.engm |= engm;
+
+       /* Block channel assignments from changing during recovery. */
+       tu102_fifo_recover_runl(fifo, runl);
+
+       /* Determine which channel (if any) is currently on the engine. */
+       gk104_fifo_engine_status(fifo, engn, &status);
+       if (status.chan) {
+               /* The channel is not longer viable, kill it. */
+               tu102_fifo_recover_chan(&fifo->base, status.chan->id);
+       }
+
+       /* Determine MMU fault ID for the engine, if we're not being
+        * called from the fault handler already.
+        */
+       if (!status.faulted && engine) {
+               mmui = nvkm_top_fault_id(device, engine->subdev.index);
+               if (mmui < 0) {
+                       const struct nvkm_enum *en = fifo->func->fault.engine;
+
+                       for (; en && en->name; en++) {
+                               if (en->data2 == engine->subdev.index) {
+                                       mmui = en->value;
+                                       break;
+                               }
+                       }
+               }
+               WARN_ON(mmui < 0);
+       }
+
+       /* Trigger a MMU fault for the engine.
+        *
+        * No good idea why this is needed, but nvgpu does something similar,
+        * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
+        */
+       if (mmui >= 0) {
+               nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
+
+               /* Wait for fault to trigger. */
+               nvkm_msec(device, 2000,
+                       gk104_fifo_engine_status(fifo, engn, &status);
+                       if (status.faulted)
+                               break;
+               );
+
+               /* Release MMU fault trigger, and ACK the fault. */
+               nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
+               nvkm_wr32(device, 0x00259c, BIT(mmui));
+               nvkm_wr32(device, 0x002100, 0x10000000);
+       }
+
+       /* Schedule recovery. */
+       nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
+       schedule_work(&fifo->recover.work);
+}
+
+static void
+tu102_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+{
+       struct gk104_fifo *fifo = gk104_fifo(base);
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       const struct nvkm_enum *er, *ee, *ec, *ea;
+       struct nvkm_engine *engine = NULL;
+       struct nvkm_fifo_chan *chan;
+       unsigned long flags;
+       char ct[8] = "HUB/", en[16] = "";
+       int engn;
+
+       er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
+       ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
+       if (info->hub) {
+               ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
+       } else {
+               ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
+               snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
+       }
+       ea = nvkm_enum_find(fifo->func->fault.access, info->access);
+
+       if (ee && ee->data2) {
+               switch (ee->data2) {
+               case NVKM_SUBDEV_BAR:
+                       nvkm_bar_bar1_reset(device);
+                       break;
+               case NVKM_SUBDEV_INSTMEM:
+                       nvkm_bar_bar2_reset(device);
+                       break;
+               case NVKM_ENGINE_IFB:
+                       nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
+                       break;
+               default:
+                       engine = nvkm_device_engine(device, ee->data2);
+                       break;
+               }
+       }
+
+       if (ee == NULL) {
+               enum nvkm_devidx engidx = nvkm_top_fault(device, info->engine);
+
+               if (engidx < NVKM_SUBDEV_NR) {
+                       const char *src = nvkm_subdev_name[engidx];
+                       char *dst = en;
+
+                       do {
+                               *dst++ = toupper(*src++);
+                       } while (*src);
+                       engine = nvkm_device_engine(device, engidx);
+               }
+       } else {
+               snprintf(en, sizeof(en), "%s", ee->name);
+       }
+
+       spin_lock_irqsave(&fifo->base.lock, flags);
+       chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
+
+       nvkm_error(subdev,
+                  "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
+                  "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
+                  info->access, ea ? ea->name : "", info->addr,
+                  info->engine, ee ? ee->name : en,
+                  info->client, ct, ec ? ec->name : "",
+                  info->reason, er ? er->name : "", chan ? chan->chid : -1,
+                  info->inst, chan ? chan->object.client->name : "unknown");
+
+       /* Kill the channel that caused the fault. */
+       if (chan)
+               tu102_fifo_recover_chan(&fifo->base, chan->chid);
+
+       /* Channel recovery will probably have already done this for the
+        * correct engine(s), but just in case we can't find the channel
+        * information...
+        */
+       for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
+               if (fifo->engine[engn].engine == engine) {
+                       tu102_fifo_recover_engn(fifo, engn);
+                       break;
+               }
+       }
+
+       spin_unlock_irqrestore(&fifo->base.lock, flags);
+}
+
+static const struct nvkm_enum
+tu102_fifo_sched_reason[] = {
+       { 0x0a, "CTXSW_TIMEOUT" },
+       {}
+};
+
+static void
+tu102_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
+{
+       struct nvkm_device *device = fifo->base.engine.subdev.device;
+       unsigned long flags, engm = 0;
+       u32 engn;
+
+       /* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
+        * as MMU_FAULT cannot be triggered while it's pending.
+        */
+       spin_lock_irqsave(&fifo->base.lock, flags);
+       nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+       nvkm_wr32(device, 0x002100, 0x00000100);
+
+       for (engn = 0; engn < fifo->engine_nr; engn++) {
+               struct gk104_fifo_engine_status status;
+
+               gk104_fifo_engine_status(fifo, engn, &status);
+               if (!status.busy || !status.chsw)
+                       continue;
+
+               engm |= BIT(engn);
+       }
+
+       for_each_set_bit(engn, &engm, fifo->engine_nr)
+               tu102_fifo_recover_engn(fifo, engn);
+
+       nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
+       spin_unlock_irqrestore(&fifo->base.lock, flags);
+}
+
+static void
+tu102_fifo_intr_sched(struct gk104_fifo *fifo)
+{
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       u32 intr = nvkm_rd32(device, 0x00254c);
+       u32 code = intr & 0x000000ff;
+       const struct nvkm_enum *en =
+               nvkm_enum_find(tu102_fifo_sched_reason, code);
+
+       nvkm_error(subdev, "SCHED_ERROR %02x [%s]\n", code, en ? en->name : "");
+
+       switch (code) {
+       case 0x0a:
+               tu102_fifo_intr_sched_ctxsw(fifo);
+               break;
+       default:
+               break;
+       }
+}
+
+static void
+tu102_fifo_intr(struct nvkm_fifo *base)
+{
+       struct gk104_fifo *fifo = gk104_fifo(base);
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       u32 mask = nvkm_rd32(device, 0x002140);
+       u32 stat = nvkm_rd32(device, 0x002100) & mask;
+
+       if (stat & 0x00000001) {
+               gk104_fifo_intr_bind(fifo);
+               nvkm_wr32(device, 0x002100, 0x00000001);
+               stat &= ~0x00000001;
+       }
+
+       if (stat & 0x00000010) {
+               nvkm_error(subdev, "PIO_ERROR\n");
+               nvkm_wr32(device, 0x002100, 0x00000010);
+               stat &= ~0x00000010;
+       }
+
+       if (stat & 0x00000100) {
+               tu102_fifo_intr_sched(fifo);
+               nvkm_wr32(device, 0x002100, 0x00000100);
+               stat &= ~0x00000100;
+       }
+
+       if (stat & 0x00010000) {
+               gk104_fifo_intr_chsw(fifo);
+               nvkm_wr32(device, 0x002100, 0x00010000);
+               stat &= ~0x00010000;
+       }
+
+       if (stat & 0x00800000) {
+               nvkm_error(subdev, "FB_FLUSH_TIMEOUT\n");
+               nvkm_wr32(device, 0x002100, 0x00800000);
+               stat &= ~0x00800000;
+       }
+
+       if (stat & 0x01000000) {
+               nvkm_error(subdev, "LB_ERROR\n");
+               nvkm_wr32(device, 0x002100, 0x01000000);
+               stat &= ~0x01000000;
+       }
+
+       if (stat & 0x08000000) {
+               gk104_fifo_intr_dropped_fault(fifo);
+               nvkm_wr32(device, 0x002100, 0x08000000);
+               stat &= ~0x08000000;
+       }
+
+       if (stat & 0x10000000) {
+               u32 mask = nvkm_rd32(device, 0x00259c);
+
+               while (mask) {
+                       u32 unit = __ffs(mask);
+                       fifo->func->intr.fault(&fifo->base, unit);
+                       nvkm_wr32(device, 0x00259c, (1 << unit));
+                       mask &= ~(1 << unit);
+               }
+               stat &= ~0x10000000;
+       }
+
+       if (stat & 0x20000000) {
+               u32 mask = nvkm_rd32(device, 0x0025a0);
+
+               while (mask) {
+                       u32 unit = __ffs(mask);
+
+                       gk104_fifo_intr_pbdma_0(fifo, unit);
+                       gk104_fifo_intr_pbdma_1(fifo, unit);
+                       nvkm_wr32(device, 0x0025a0, (1 << unit));
+                       mask &= ~(1 << unit);
+               }
+               stat &= ~0x20000000;
+       }
+
+       if (stat & 0x40000000) {
+               gk104_fifo_intr_runlist(fifo);
+               stat &= ~0x40000000;
+       }
+
+       if (stat & 0x80000000) {
+               nvkm_wr32(device, 0x002100, 0x80000000);
+               gk104_fifo_intr_engine(fifo);
+               stat &= ~0x80000000;
+       }
+
+       if (stat) {
+               nvkm_error(subdev, "INTR %08x\n", stat);
+               nvkm_mask(device, 0x002140, stat, 0x00000000);
+               nvkm_wr32(device, 0x002100, stat);
+       }
+}
+
+static const struct nvkm_fifo_func
+tu102_fifo_ = {
+       .dtor = gk104_fifo_dtor,
+       .oneinit = gk104_fifo_oneinit,
+       .info = gk104_fifo_info,
+       .init = gk104_fifo_init,
+       .fini = gk104_fifo_fini,
+       .intr = tu102_fifo_intr,
+       .fault = tu102_fifo_fault,
+       .uevent_init = gk104_fifo_uevent_init,
+       .uevent_fini = gk104_fifo_uevent_fini,
+       .recover_chan = tu102_fifo_recover_chan,
+       .class_get = gk104_fifo_class_get,
+       .class_new = gk104_fifo_class_new,
+};
+
 int
 tu102_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
 {
-       return gk104_fifo_new_(&tu102_fifo, device, index, 4096, pfifo);
+       struct gk104_fifo *fifo;
+
+       if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
+               return -ENOMEM;
+       fifo->func = &tu102_fifo;
+       INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work);
+       *pfifo = &fifo->base;
+
+       return nvkm_fifo_ctor(&tu102_fifo_, device, index, 4096, &fifo->base);
 }