We weren't sending the high bits, though they're zero currently anyway.
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
 
        if (gr->bundle_veid)
                gf100_gr_icmd(gr, gr->bundle_veid);
-       if (grctx->sw_veid_bundle_init)
+       else
                gf100_gr_icmd(gr, grctx->sw_veid_bundle_init);
 
+       if (gr->bundle64)
+               gf100_gr_icmd(gr, gr->bundle64);
+       else
+       if (grctx->sw_bundle64_init)
+               gf100_gr_icmd(gr, grctx->sw_bundle64_init);
+
        if (grctx->r400088) grctx->r400088(gr, true);
 
        nvkm_wr32(device, 0x404154, idle_timeout);
 
        const struct gf100_gr_pack *icmd;
        const struct gf100_gr_pack *mthd;
        const struct gf100_gr_pack *sw_veid_bundle_init;
+       const struct gf100_gr_pack *sw_bundle64_init;
        /* bundle circular buffer */
        void (*bundle)(struct gf100_gr_chan *, u64 addr, u32 size);
        u32 bundle_size;
 
 };
 
 static const struct gf100_gr_pack
-tu102_grctx_pack_sw_veid_bundle_init[] = {
-       { tu102_grctx_init_unknown_bundle_init_0 },
+tu102_grctx_pack_sw_bundle64_init[] = {
+       { tu102_grctx_init_unknown_bundle_init_0, .type = 64 },
        {}
 };
 
        .unkn88c = gv100_grctx_unkn88c,
        .main = gf100_grctx_generate_main,
        .unkn = gv100_grctx_generate_unkn,
-       .sw_veid_bundle_init = tu102_grctx_pack_sw_veid_bundle_init,
+       .sw_bundle64_init = tu102_grctx_pack_sw_bundle64_init,
        .bundle = gm107_grctx_generate_bundle,
        .bundle_size = 0x3000,
        .bundle_min_gpm_fifo_depth = 0x180,
 
        struct nvkm_device *device = gr->base.engine.subdev.device;
        const struct gf100_gr_pack *pack;
        const struct gf100_gr_init *init;
-       u32 data = 0;
+       u64 data = 0;
 
        nvkm_wr32(device, 0x400208, 0x80000000);
 
 
                if ((pack == p && init == p->init) || data != init->data) {
                        nvkm_wr32(device, 0x400204, init->data);
+                       if (pack->type == 64)
+                               nvkm_wr32(device, 0x40020c, upper_32_bits(init->data));
                        data = init->data;
                }
 
        nvkm_blob_dtor(&gr->gpccs.inst);
        nvkm_blob_dtor(&gr->gpccs.data);
 
+       vfree(gr->bundle64);
        vfree(gr->bundle_veid);
        vfree(gr->bundle);
        vfree(gr->method);
 
        struct gf100_gr_pack *sw_ctx;
        struct gf100_gr_pack *bundle;
        struct gf100_gr_pack *bundle_veid;
+       struct gf100_gr_pack *bundle64;
        struct gf100_gr_pack *method;
 
        struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_COLOR_CNT];
        u32 addr;
        u8  count;
        u32 pitch;
-       u32 data;
+       u64 data;
 };
 
 struct gf100_gr_pack {