.unset_window = pnv_npu_unset_window,
        .take_ownership = pnv_npu_take_ownership,
 };
-
-struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
-{
-       struct pnv_phb *phb = npe->phb;
-       struct pci_bus *pbus = phb->hose->bus;
-       struct pci_dev *npdev, *gpdev = NULL, *gptmp;
-       struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
-
-       if (!gpe || !gpdev)
-               return NULL;
-
-       npe->table_group.ops = &pnv_pci_npu_ops;
-
-       list_for_each_entry(npdev, &pbus->devices, bus_list) {
-               gptmp = pnv_pci_get_gpu_dev(npdev);
-
-               if (gptmp != gpdev)
-                       continue;
-
-               pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
-               iommu_group_add_device(gpe->table_group.group, &npdev->dev);
-       }
-
-       return gpe;
-}
 #endif /* !CONFIG_IOMMU_API */
 
 /*
  */
 /* Maximum possible number of ATSD MMIO registers per NPU */
 #define NV_NMMU_ATSD_REGS 8
+#define NV_NPU_MAX_PE_NUM      16
+
+/*
+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
+ * up to 3 x (GPU + 2xNPUs) (POWER9).
+ */
+struct npu_comp {
+       struct iommu_table_group table_group;
+       int pe_num;
+       struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
+};
 
 /* An NPU descriptor, valid for POWER9 only */
 struct npu {
 
        /* Do we need to explicitly flush the nest mmu? */
        bool nmmu_flush;
+
+       struct npu_comp npucomp;
 };
 
+#ifdef CONFIG_IOMMU_API
+static long pnv_npu_peers_create_table_userspace(
+               struct iommu_table_group *table_group,
+               int num, __u32 page_shift, __u64 window_size, __u32 levels,
+               struct iommu_table **ptbl)
+{
+       struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+                       table_group);
+
+       if (!npucomp->pe_num || !npucomp->pe[0] ||
+                       !npucomp->pe[0]->table_group.ops ||
+                       !npucomp->pe[0]->table_group.ops->create_table)
+               return -EFAULT;
+
+       return npucomp->pe[0]->table_group.ops->create_table(
+                       &npucomp->pe[0]->table_group, num, page_shift,
+                       window_size, levels, ptbl);
+}
+
+static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
+               int num, struct iommu_table *tbl)
+{
+       int i, j;
+       long ret = 0;
+       struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+                       table_group);
+
+       for (i = 0; i < npucomp->pe_num; ++i) {
+               struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+               if (!pe->table_group.ops->set_window)
+                       continue;
+
+               ret = pe->table_group.ops->set_window(&pe->table_group,
+                               num, tbl);
+               if (ret)
+                       break;
+       }
+
+       if (ret) {
+               for (j = 0; j < i; ++j) {
+                       struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+                       if (!pe->table_group.ops->unset_window)
+                               continue;
+
+                       ret = pe->table_group.ops->unset_window(
+                                       &pe->table_group, num);
+                       if (ret)
+                               break;
+               }
+       } else {
+               table_group->tables[num] = iommu_tce_table_get(tbl);
+       }
+
+       return ret;
+}
+
+static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
+               int num)
+{
+       int i, j;
+       long ret = 0;
+       struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+                       table_group);
+
+       for (i = 0; i < npucomp->pe_num; ++i) {
+               struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+               WARN_ON(npucomp->table_group.tables[num] !=
+                               table_group->tables[num]);
+               if (!npucomp->table_group.tables[num])
+                       continue;
+
+               if (!pe->table_group.ops->unset_window)
+                       continue;
+
+               ret = pe->table_group.ops->unset_window(&pe->table_group, num);
+               if (ret)
+                       break;
+       }
+
+       if (ret) {
+               for (j = 0; j < i; ++j) {
+                       struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+                       if (!npucomp->table_group.tables[num])
+                               continue;
+
+                       if (!pe->table_group.ops->set_window)
+                               continue;
+
+                       ret = pe->table_group.ops->set_window(&pe->table_group,
+                                       num, table_group->tables[num]);
+                       if (ret)
+                               break;
+               }
+       } else if (table_group->tables[num]) {
+               iommu_tce_table_put(table_group->tables[num]);
+               table_group->tables[num] = NULL;
+       }
+
+       return ret;
+}
+
+static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
+{
+       int i;
+       struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+                       table_group);
+
+       for (i = 0; i < npucomp->pe_num; ++i) {
+               struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+               if (!pe->table_group.ops->take_ownership)
+                       continue;
+               pe->table_group.ops->take_ownership(&pe->table_group);
+       }
+}
+
+static void pnv_npu_peers_release_ownership(
+               struct iommu_table_group *table_group)
+{
+       int i;
+       struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+                       table_group);
+
+       for (i = 0; i < npucomp->pe_num; ++i) {
+               struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+               if (!pe->table_group.ops->release_ownership)
+                       continue;
+               pe->table_group.ops->release_ownership(&pe->table_group);
+       }
+}
+
+static struct iommu_table_group_ops pnv_npu_peers_ops = {
+       .get_table_size = pnv_pci_ioda2_get_table_size,
+       .create_table = pnv_npu_peers_create_table_userspace,
+       .set_window = pnv_npu_peers_set_window,
+       .unset_window = pnv_npu_peers_unset_window,
+       .take_ownership = pnv_npu_peers_take_ownership,
+       .release_ownership = pnv_npu_peers_release_ownership,
+};
+
+static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
+               struct pnv_ioda_pe *pe)
+{
+       if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
+               return;
+
+       npucomp->pe[npucomp->pe_num] = pe;
+       ++npucomp->pe_num;
+}
+
+struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
+{
+       struct iommu_table_group *table_group;
+       struct npu_comp *npucomp;
+       struct pci_dev *gpdev = NULL;
+       struct pci_controller *hose;
+       struct pci_dev *npdev = NULL;
+
+       list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
+               npdev = pnv_pci_get_npu_dev(gpdev, 0);
+               if (npdev)
+                       break;
+       }
+
+       if (!npdev)
+               /* It is not an NPU attached device, skip */
+               return NULL;
+
+       hose = pci_bus_to_host(npdev->bus);
+
+       if (hose->npu) {
+               table_group = &hose->npu->npucomp.table_group;
+
+               if (!table_group->group) {
+                       table_group->ops = &pnv_npu_peers_ops;
+                       iommu_register_group(table_group,
+                                       hose->global_number,
+                                       pe->pe_number);
+               }
+       } else {
+               /* Create a group for 1 GPU and attached NPUs for POWER8 */
+               pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
+               table_group = &pe->npucomp->table_group;
+               table_group->ops = &pnv_npu_peers_ops;
+               iommu_register_group(table_group, hose->global_number,
+                               pe->pe_number);
+       }
+
+       /* Steal capabilities from a GPU PE */
+       table_group->max_dynamic_windows_supported =
+               pe->table_group.max_dynamic_windows_supported;
+       table_group->tce32_start = pe->table_group.tce32_start;
+       table_group->tce32_size = pe->table_group.tce32_size;
+       table_group->max_levels = pe->table_group.max_levels;
+       if (!table_group->pgsizes)
+               table_group->pgsizes = pe->table_group.pgsizes;
+
+       npucomp = container_of(table_group, struct npu_comp, table_group);
+       pnv_comp_attach_table_group(npucomp, pe);
+
+       return table_group;
+}
+
+struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
+{
+       struct iommu_table_group *table_group;
+       struct npu_comp *npucomp;
+       struct pci_dev *gpdev = NULL;
+       struct pci_dev *npdev;
+       struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
+
+       WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
+       if (!gpe)
+               return NULL;
+
+       /*
+        * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
+        * but NPU bridges do not have this hook defined so we do it here.
+        * We do not setup other table group parameters as they won't be used
+        * anyway - NVLink bridges are subordinate PEs.
+        */
+       pe->table_group.ops = &pnv_pci_npu_ops;
+
+       table_group = iommu_group_get_iommudata(
+                       iommu_group_get(&gpdev->dev));
+
+       /*
+        * On P9 NPU PHB and PCI PHB support different page sizes,
+        * keep only matching. We expect here that NVLink bridge PE pgsizes is
+        * initialized by the caller.
+        */
+       table_group->pgsizes &= pe->table_group.pgsizes;
+       npucomp = container_of(table_group, struct npu_comp, table_group);
+       pnv_comp_attach_table_group(npucomp, pe);
+
+       list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
+               struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
+
+               if (gpdevtmp != gpdev)
+                       continue;
+
+               iommu_add_device(table_group, &npdev->dev);
+       }
+
+       return table_group;
+}
+#endif /* CONFIG_IOMMU_API */
+
 /* Maximum number of nvlinks per npu */
 #define NV_MAX_LINKS 6
 
 
        unsigned int pe_num = pe->pe_number;
 
        WARN_ON(pe->pdev);
-
+       WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
+       kfree(pe->npucomp);
        memset(pe, 0, sizeof(struct pnv_ioda_pe));
        clear_bit(pe_num, phb->ioda.pe_alloc);
 }
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
                                       struct pnv_ioda_pe *pe);
 #ifdef CONFIG_IOMMU_API
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+               struct iommu_table_group *table_group, struct pci_bus *bus);
+
 #endif
 static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 {
 
                pnv_pci_ioda2_setup_dma_pe(phb, pe);
 #ifdef CONFIG_IOMMU_API
-               pnv_ioda_setup_bus_iommu_group(pe);
+               pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
 #endif
        }
 }
 #endif
 
 #ifdef CONFIG_IOMMU_API
-static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
                __u64 window_size, __u32 levels)
 {
        unsigned long bytes = 0;
        .release_ownership = pnv_ioda2_release_ownership,
 };
 
-static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
-{
-       struct pci_controller *hose;
-       struct pnv_phb *phb;
-       struct pnv_ioda_pe **ptmppe = opaque;
-       struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
-       struct pci_dn *pdn = pci_get_pdn(pdev);
-
-       if (!pdn || pdn->pe_number == IODA_INVALID_PE)
-               return 0;
-
-       hose = pci_bus_to_host(pdev->bus);
-       phb = hose->private_data;
-       if (phb->type != PNV_PHB_NPU_NVLINK)
-               return 0;
-
-       *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
-
-       return 1;
-}
-
-/*
- * This returns PE of associated NPU.
- * This assumes that NPU is in the same IOMMU group with GPU and there is
- * no other PEs.
- */
-static struct pnv_ioda_pe *gpe_table_group_to_npe(
-               struct iommu_table_group *table_group)
-{
-       struct pnv_ioda_pe *npe = NULL;
-       int ret = iommu_group_for_each_dev(table_group->group, &npe,
-                       gpe_table_group_to_npe_cb);
-
-       BUG_ON(!ret || !npe);
-
-       return npe;
-}
-
-static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
-               int num, struct iommu_table *tbl)
-{
-       struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
-       long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
-
-       if (ret)
-               return ret;
-
-       ret = npe->table_group.ops->set_window(&npe->table_group, num, tbl);
-       if (ret)
-               pnv_pci_ioda2_unset_window(table_group, num);
-
-       return ret;
-}
-
-static long pnv_pci_ioda2_npu_unset_window(
-               struct iommu_table_group *table_group,
-               int num)
-{
-       struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
-       long ret = pnv_pci_ioda2_unset_window(table_group, num);
-
-       if (ret)
-               return ret;
-
-       return npe->table_group.ops->unset_window(&npe->table_group, num);
-}
-
-static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
-{
-       struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
-
-       npe->table_group.ops->take_ownership(&npe->table_group);
-       pnv_ioda2_take_ownership(table_group);
-}
-
-static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
-       .get_table_size = pnv_pci_ioda2_get_table_size,
-       .create_table = pnv_pci_ioda2_create_table_userspace,
-       .set_window = pnv_pci_ioda2_npu_set_window,
-       .unset_window = pnv_pci_ioda2_npu_unset_window,
-       .take_ownership = pnv_ioda2_npu_take_ownership,
-       .release_ownership = pnv_ioda2_release_ownership,
-};
-
 static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
+               struct iommu_table_group *table_group,
                struct pci_bus *bus)
 {
        struct pci_dev *dev;
 
        list_for_each_entry(dev, &bus->devices, bus_list) {
-               iommu_add_device(&pe->table_group, &dev->dev);
+               iommu_add_device(table_group, &dev->dev);
 
                if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
                        pnv_ioda_setup_bus_iommu_group_add_devices(pe,
-                                       dev->subordinate);
+                                       table_group, dev->subordinate);
        }
 }
 
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+               struct iommu_table_group *table_group, struct pci_bus *bus)
 {
-       if (!pnv_pci_ioda_pe_dma_weight(pe))
-               return;
-
-       iommu_register_group(&pe->table_group, pe->phb->hose->global_number,
-                       pe->pe_number);
 
-       /*
-        * set_iommu_table_base(&pe->pdev->dev, tbl) should have been called
-        * by now
-        */
        if (pe->flags & PNV_IODA_PE_DEV)
-               iommu_add_device(&pe->table_group, &pe->pdev->dev);
-       else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-               pnv_ioda_setup_bus_iommu_group_add_devices(pe, pe->pbus);
+               iommu_add_device(table_group, &pe->pdev->dev);
+
+       if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
+               pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
+                               bus);
 }
 
+static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
+
 static void pnv_pci_ioda_setup_iommu_api(void)
 {
-       struct pci_controller *hose, *tmp;
+       struct pci_controller *hose;
        struct pnv_phb *phb;
-       struct pnv_ioda_pe *pe, *gpe;
+       struct pnv_ioda_pe *pe;
 
        /*
         * There are 4 types of PEs:
                if (phb->type == PNV_PHB_NPU_NVLINK)
                        continue;
 
-               list_for_each_entry(pe, &phb->ioda.pe_list, list)
-                       pnv_ioda_setup_bus_iommu_group(pe);
+               list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+                       struct iommu_table_group *table_group;
+
+                       table_group = pnv_try_setup_npu_table_group(pe);
+                       if (!table_group) {
+                               if (!pnv_pci_ioda_pe_dma_weight(pe))
+                                       continue;
+
+                               table_group = &pe->table_group;
+                               iommu_register_group(&pe->table_group,
+                                               pe->phb->hose->global_number,
+                                               pe->pe_number);
+                       }
+                       pnv_ioda_setup_bus_iommu_group(pe, table_group,
+                                       pe->pbus);
+               }
        }
 
        /*
         * Now we have all PHBs discovered, time to add NPU devices to
         * the corresponding IOMMU groups.
         */
-       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+       list_for_each_entry(hose, &hose_list, list_node) {
+               unsigned long  pgsizes;
+
                phb = hose->private_data;
 
                if (phb->type != PNV_PHB_NPU_NVLINK)
                        continue;
 
+               pgsizes = pnv_ioda_parse_tce_sizes(phb);
                list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-                       gpe = pnv_pci_npu_setup_iommu(pe);
-                       if (gpe)
-                               gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+                       /*
+                        * IODA2 bridges get this set up from
+                        * pci_controller_ops::setup_bridge but NPU bridges
+                        * do not have this hook defined so we do it here.
+                        */
+                       pe->table_group.pgsizes = pgsizes;
+                       pnv_npu_compound_attach(pe);
                }
        }
 }