.init = host1x01_init,
        .sync_offset = 0x3000,
        .dma_mask = DMA_BIT_MASK(32),
+       .has_wide_gather = false,
        .has_hypervisor = false,
        .num_sid_entries = 0,
        .sid_table = NULL,
        .init = host1x02_init,
        .sync_offset = 0x3000,
        .dma_mask = DMA_BIT_MASK(32),
+       .has_wide_gather = false,
        .has_hypervisor = false,
        .num_sid_entries = 0,
        .sid_table = NULL,
        .init = host1x04_init,
        .sync_offset = 0x2100,
        .dma_mask = DMA_BIT_MASK(34),
+       .has_wide_gather = false,
        .has_hypervisor = false,
        .num_sid_entries = 0,
        .sid_table = NULL,
        .init = host1x05_init,
        .sync_offset = 0x2100,
        .dma_mask = DMA_BIT_MASK(34),
+       .has_wide_gather = false,
        .has_hypervisor = false,
        .num_sid_entries = 0,
        .sid_table = NULL,
        .init = host1x06_init,
        .sync_offset = 0x0,
        .dma_mask = DMA_BIT_MASK(40),
+       .has_wide_gather = true,
        .has_hypervisor = true,
        .num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
        .sid_table = tegra186_sid_table,
        .init = host1x07_init,
        .sync_offset = 0x0,
        .dma_mask = DMA_BIT_MASK(40),
+       .has_wide_gather = true,
        .has_hypervisor = true,
        .num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
        .sid_table = tegra194_sid_table,
        }
 }
 
+static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
+{
+       struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
+       int err;
+
+       /*
+        * If the host1x firewall is enabled, there's no need to enable IOMMU
+        * support. Similarly, if host1x is already attached to an IOMMU (via
+        * the DMA API), don't try to attach again.
+        */
+       if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
+               return domain;
+
+       host->group = iommu_group_get(host->dev);
+       if (host->group) {
+               struct iommu_domain_geometry *geometry;
+               dma_addr_t start, end;
+               unsigned long order;
+
+               err = iova_cache_get();
+               if (err < 0)
+                       goto put_group;
+
+               host->domain = iommu_domain_alloc(&platform_bus_type);
+               if (!host->domain) {
+                       err = -ENOMEM;
+                       goto put_cache;
+               }
+
+               err = iommu_attach_group(host->domain, host->group);
+               if (err) {
+                       if (err == -ENODEV)
+                               err = 0;
+
+                       goto free_domain;
+               }
+
+               geometry = &host->domain->geometry;
+               start = geometry->aperture_start & host->info->dma_mask;
+               end = geometry->aperture_end & host->info->dma_mask;
+
+               order = __ffs(host->domain->pgsize_bitmap);
+               init_iova_domain(&host->iova, 1UL << order, start >> order);
+               host->iova_end = end;
+
+               domain = host->domain;
+       }
+
+       return domain;
+
+free_domain:
+       iommu_domain_free(host->domain);
+       host->domain = NULL;
+put_cache:
+       iova_cache_put();
+put_group:
+       iommu_group_put(host->group);
+       host->group = NULL;
+
+       return ERR_PTR(err);
+}
+
+static int host1x_iommu_init(struct host1x *host)
+{
+       u64 mask = host->info->dma_mask;
+       struct iommu_domain *domain;
+       int err;
+
+       domain = host1x_iommu_attach(host);
+       if (IS_ERR(domain)) {
+               err = PTR_ERR(domain);
+               dev_err(host->dev, "failed to attach to IOMMU: %d\n", err);
+               return err;
+       }
+
+       /*
+        * If we're not behind an IOMMU make sure we don't get push buffers
+        * that are allocated outside of the range addressable by the GATHER
+        * opcode.
+        *
+        * Newer generations of Tegra (Tegra186 and later) support a wide
+        * variant of the GATHER opcode that allows addressing more bits.
+        */
+       if (!domain && !host->info->has_wide_gather)
+               mask = DMA_BIT_MASK(32);
+
+       err = dma_coerce_mask_and_coherent(host->dev, mask);
+       if (err < 0) {
+               dev_err(host->dev, "failed to set DMA mask: %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static void host1x_iommu_exit(struct host1x *host)
+{
+       if (host->domain) {
+               put_iova_domain(&host->iova);
+               iommu_detach_group(host->domain, host->group);
+
+               iommu_domain_free(host->domain);
+               host->domain = NULL;
+
+               iova_cache_put();
+
+               iommu_group_put(host->group);
+               host->group = NULL;
+       }
+}
+
 static int host1x_probe(struct platform_device *pdev)
 {
        struct host1x *host;
        host->dev->dma_parms = &host->dma_parms;
        dma_set_max_seg_size(host->dev, UINT_MAX);
 
-       dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
-
        if (host->info->init) {
                err = host->info->init(host);
                if (err)
                return err;
        }
 
-       if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
-               goto skip_iommu;
-
-       if (iommu_get_domain_for_dev(&pdev->dev))
-               goto skip_iommu;
-
-       host->group = iommu_group_get(&pdev->dev);
-       if (host->group) {
-               struct iommu_domain_geometry *geometry;
-               u64 mask = dma_get_mask(host->dev);
-               dma_addr_t start, end;
-               unsigned long order;
-
-               err = iova_cache_get();
-               if (err < 0)
-                       goto put_group;
-
-               host->domain = iommu_domain_alloc(&platform_bus_type);
-               if (!host->domain) {
-                       err = -ENOMEM;
-                       goto put_cache;
-               }
-
-               err = iommu_attach_group(host->domain, host->group);
-               if (err) {
-                       if (err == -ENODEV) {
-                               iommu_domain_free(host->domain);
-                               host->domain = NULL;
-                               iova_cache_put();
-                               iommu_group_put(host->group);
-                               host->group = NULL;
-                               goto skip_iommu;
-                       }
-
-                       goto fail_free_domain;
-               }
-
-               geometry = &host->domain->geometry;
-               start = geometry->aperture_start & mask;
-               end = geometry->aperture_end & mask;
-
-               order = __ffs(host->domain->pgsize_bitmap);
-               init_iova_domain(&host->iova, 1UL << order, start >> order);
-               host->iova_end = end;
+       err = host1x_iommu_init(host);
+       if (err < 0) {
+               dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
+               return err;
        }
 
-skip_iommu:
        err = host1x_channel_list_init(&host->channel_list,
                                       host->info->nb_channels);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize channel list\n");
-               goto fail_detach_device;
+               goto iommu_exit;
        }
 
        err = clk_prepare_enable(host->clk);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to enable clock\n");
-               goto fail_free_channels;
+               goto free_channels;
        }
 
        err = reset_control_deassert(host->rst);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
-               goto fail_unprepare_disable;
+               goto unprepare_disable;
        }
 
        err = host1x_syncpt_init(host);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize syncpts\n");
-               goto fail_reset_assert;
+               goto reset_assert;
        }
 
        err = host1x_intr_init(host, syncpt_irq);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize interrupts\n");
-               goto fail_deinit_syncpt;
+               goto deinit_syncpt;
        }
 
        host1x_debug_init(host);
 
        err = host1x_register(host);
        if (err < 0)
-               goto fail_deinit_intr;
+               goto deinit_intr;
 
        return 0;
 
-fail_deinit_intr:
+deinit_intr:
        host1x_intr_deinit(host);
-fail_deinit_syncpt:
+deinit_syncpt:
        host1x_syncpt_deinit(host);
-fail_reset_assert:
+reset_assert:
        reset_control_assert(host->rst);
-fail_unprepare_disable:
+unprepare_disable:
        clk_disable_unprepare(host->clk);
-fail_free_channels:
+free_channels:
        host1x_channel_list_free(&host->channel_list);
-fail_detach_device:
-       if (host->group && host->domain) {
-               put_iova_domain(&host->iova);
-               iommu_detach_group(host->domain, host->group);
-       }
-fail_free_domain:
-       if (host->domain)
-               iommu_domain_free(host->domain);
-put_cache:
-       if (host->group)
-               iova_cache_put();
-put_group:
-       iommu_group_put(host->group);
+iommu_exit:
+       host1x_iommu_exit(host);
 
        return err;
 }
        host1x_syncpt_deinit(host);
        reset_control_assert(host->rst);
        clk_disable_unprepare(host->clk);
-
-       if (host->domain) {
-               put_iova_domain(&host->iova);
-               iommu_detach_group(host->domain, host->group);
-               iommu_domain_free(host->domain);
-               iova_cache_put();
-               iommu_group_put(host->group);
-       }
+       host1x_iommu_exit(host);
 
        return 0;
 }