#include <asm/tce.h>
 #include <asm/xics.h>
 #include <asm/debug.h>
+#include <asm/firmware.h>
 
 #include "powernv.h"
 #include "pci.h"
                : : "r" (val), "r" (paddr) : "memory");
 }
 
+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+{
+       return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
+               (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+}
+
 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
        unsigned long pe;
        clear_bit(pe, phb->ioda.pe_alloc);
 }
 
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+       const char *desc;
+       struct resource *r;
+       s64 rc;
+
+       /* Configure the default M64 BAR */
+       rc = opal_pci_set_phb_mem_window(phb->opal_id,
+                                        OPAL_M64_WINDOW_TYPE,
+                                        phb->ioda.m64_bar_idx,
+                                        phb->ioda.m64_base,
+                                        0, /* unused */
+                                        phb->ioda.m64_size);
+       if (rc != OPAL_SUCCESS) {
+               desc = "configuring";
+               goto fail;
+       }
+
+       /* Enable the default M64 BAR */
+       rc = opal_pci_phb_mmio_enable(phb->opal_id,
+                                     OPAL_M64_WINDOW_TYPE,
+                                     phb->ioda.m64_bar_idx,
+                                     OPAL_ENABLE_M64_SPLIT);
+       if (rc != OPAL_SUCCESS) {
+               desc = "enabling";
+               goto fail;
+       }
+
+       /* Mark the M64 BAR assigned */
+       set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
+
+       /*
+        * Strip off the segment used by the reserved PE, which is
+        * expected to be 0 or last one of PE capabicity.
+        */
+       r = &phb->hose->mem_resources[1];
+       if (phb->ioda.reserved_pe == 0)
+               r->start += phb->ioda.m64_segsize;
+       else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+               r->end -= phb->ioda.m64_segsize;
+       else
+               pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
+                       phb->ioda.reserved_pe);
+
+       return 0;
+
+fail:
+       pr_warn("  Failure %lld %s M64 BAR#%d\n",
+               rc, desc, phb->ioda.m64_bar_idx);
+       opal_pci_phb_mmio_enable(phb->opal_id,
+                                OPAL_M64_WINDOW_TYPE,
+                                phb->ioda.m64_bar_idx,
+                                OPAL_DISABLE_M64);
+       return -EIO;
+}
+
+static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
+{
+       resource_size_t sgsz = phb->ioda.m64_segsize;
+       struct pci_dev *pdev;
+       struct resource *r;
+       int base, step, i;
+
+       /*
+        * Root bus always has full M64 range and root port has
+        * M64 range used in reality. So we're checking root port
+        * instead of root bus.
+        */
+       list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
+               for (i = PCI_BRIDGE_RESOURCES;
+                    i <= PCI_BRIDGE_RESOURCE_END; i++) {
+                       r = &pdev->resource[i];
+                       if (!r->parent ||
+                           !pnv_pci_is_mem_pref_64(r->flags))
+                               continue;
+
+                       base = (r->start - phb->ioda.m64_base) / sgsz;
+                       for (step = 0; step < resource_size(r) / sgsz; step++)
+                               set_bit(base + step, phb->ioda.pe_alloc);
+               }
+       }
+}
+
+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
+                                struct pci_bus *bus, int all)
+{
+       resource_size_t segsz = phb->ioda.m64_segsize;
+       struct pci_dev *pdev;
+       struct resource *r;
+       struct pnv_ioda_pe *master_pe, *pe;
+       unsigned long size, *pe_alloc;
+       bool found;
+       int start, i, j;
+
+       /* Root bus shouldn't use M64 */
+       if (pci_is_root_bus(bus))
+               return IODA_INVALID_PE;
+
+       /* We support only one M64 window on each bus */
+       found = false;
+       pci_bus_for_each_resource(bus, r, i) {
+               if (r && r->parent &&
+                   pnv_pci_is_mem_pref_64(r->flags)) {
+                       found = true;
+                       break;
+               }
+       }
+
+       /* No M64 window found ? */
+       if (!found)
+               return IODA_INVALID_PE;
+
+       /* Allocate bitmap */
+       size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+       pe_alloc = kzalloc(size, GFP_KERNEL);
+       if (!pe_alloc) {
+               pr_warn("%s: Out of memory !\n",
+                       __func__);
+               return IODA_INVALID_PE;
+       }
+
+       /*
+        * Figure out reserved PE numbers by the PE
+        * the its child PEs.
+        */
+       start = (r->start - phb->ioda.m64_base) / segsz;
+       for (i = 0; i < resource_size(r) / segsz; i++)
+               set_bit(start + i, pe_alloc);
+
+       if (all)
+               goto done;
+
+       /*
+        * If the PE doesn't cover all subordinate buses,
+        * we need subtract from reserved PEs for children.
+        */
+       list_for_each_entry(pdev, &bus->devices, bus_list) {
+               if (!pdev->subordinate)
+                       continue;
+
+               pci_bus_for_each_resource(pdev->subordinate, r, i) {
+                       if (!r || !r->parent ||
+                           !pnv_pci_is_mem_pref_64(r->flags))
+                               continue;
+
+                       start = (r->start - phb->ioda.m64_base) / segsz;
+                       for (j = 0; j < resource_size(r) / segsz ; j++)
+                               clear_bit(start + j, pe_alloc);
+                }
+        }
+
+       /*
+        * the current bus might not own M64 window and that's all
+        * contributed by its child buses. For the case, we needn't
+        * pick M64 dependent PE#.
+        */
+       if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+               kfree(pe_alloc);
+               return IODA_INVALID_PE;
+       }
+
+       /*
+        * Figure out the master PE and put all slave PEs to master
+        * PE's list to form compound PE.
+        */
+done:
+       master_pe = NULL;
+       i = -1;
+       while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
+               phb->ioda.total_pe) {
+               pe = &phb->ioda.pe_array[i];
+               pe->phb = phb;
+               pe->pe_number = i;
+
+               if (!master_pe) {
+                       pe->flags |= PNV_IODA_PE_MASTER;
+                       INIT_LIST_HEAD(&pe->slaves);
+                       master_pe = pe;
+               } else {
+                       pe->flags |= PNV_IODA_PE_SLAVE;
+                       pe->master = master_pe;
+                       list_add_tail(&pe->list, &master_pe->slaves);
+               }
+       }
+
+       kfree(pe_alloc);
+       return master_pe->pe_number;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+       struct pci_controller *hose = phb->hose;
+       struct device_node *dn = hose->dn;
+       struct resource *res;
+       const u32 *r;
+       u64 pci_addr;
+
+       if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+               pr_info("  Firmware too old to support M64 window\n");
+               return;
+       }
+
+       r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+       if (!r) {
+               pr_info("  No <ibm,opal-m64-window> on %s\n",
+                       dn->full_name);
+               return;
+       }
+
+       /* FIXME: Support M64 for P7IOC */
+       if (phb->type != PNV_PHB_IODA2) {
+               pr_info("  Not support M64 window\n");
+               return;
+       }
+
+       res = &hose->mem_resources[1];
+       res->start = of_translate_address(dn, r + 2);
+       res->end = res->start + of_read_number(r + 4, 2) - 1;
+       res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+       pci_addr = of_read_number(r, 2);
+       hose->mem_offset[1] = res->start - pci_addr;
+
+       phb->ioda.m64_size = resource_size(res);
+       phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+       phb->ioda.m64_base = pci_addr;
+
+       /* Use last M64 BAR to cover M64 window */
+       phb->ioda.m64_bar_idx = 15;
+       phb->init_m64 = pnv_ioda2_init_m64;
+       phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
+       phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+}
+
 /* Currently those 2 are only used when MSIs are enabled, this will change
  * but in the meantime, we need to protect them to avoid warnings
  */
        struct pci_controller *hose = pci_bus_to_host(bus);
        struct pnv_phb *phb = hose->private_data;
        struct pnv_ioda_pe *pe;
-       int pe_num;
+       int pe_num = IODA_INVALID_PE;
+
+       /* Check if PE is determined by M64 */
+       if (phb->pick_m64_pe)
+               pe_num = phb->pick_m64_pe(phb, bus, all);
+
+       /* The PE number isn't pinned by M64 */
+       if (pe_num == IODA_INVALID_PE)
+               pe_num = pnv_ioda_alloc_pe(phb);
 
-       pe_num = pnv_ioda_alloc_pe(phb);
        if (pe_num == IODA_INVALID_PE) {
                pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
                        __func__, pci_domain_nr(bus), bus->number);
        }
 
        pe = &phb->ioda.pe_array[pe_num];
-       pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+       pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
        pe->pbus = bus;
        pe->pdev = NULL;
        pe->tce32_seg = -1;
 static void pnv_pci_ioda_setup_PEs(void)
 {
        struct pci_controller *hose, *tmp;
+       struct pnv_phb *phb;
 
        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+               phb = hose->private_data;
+
+               /* M64 layout might affect PE allocation */
+               if (phb->alloc_m64_pe)
+                       phb->alloc_m64_pe(phb);
+
                pnv_ioda_setup_PEs(hose->bus);
        }
 }
                                index++;
                        }
                } else if (res->flags & IORESOURCE_MEM) {
-                       /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
-                        * harden that algorithm when we start supporting M64
-                        */
                        region.start = res->start -
                                       hose->mem_offset[0] -
                                       phb->ioda.m32_pci_base;
                bridge = bridge->bus->self;
        }
 
-       /* We need support prefetchable memory window later */
+       /* We fail back to M32 if M64 isn't supported */
+       if (phb->ioda.m64_segsize &&
+           pnv_pci_is_mem_pref_64(type))
+               return phb->ioda.m64_segsize;
        if (type & IORESOURCE_MEM)
                return phb->ioda.m32_segsize;
 
        prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
        if (prop32)
                phb->ioda.reserved_pe = be32_to_cpup(prop32);
+
+       /* Parse 64-bit MMIO range */
+       pnv_ioda_parse_m64_window(phb);
+
        phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
        /* FW Has already off top 64k of M32 space (MSI space) */
        phb->ioda.m32_size += 0x10000;
        /* Calculate how many 32-bit TCE segments we have */
        phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
 
-       /* Clear unusable m64 */
-       hose->mem_resources[1].flags = 0;
-       hose->mem_resources[1].start = 0;
-       hose->mem_resources[1].end = 0;
-       hose->mem_resources[2].flags = 0;
-       hose->mem_resources[2].start = 0;
-       hose->mem_resources[2].end = 0;
-
 #if 0 /* We should really do that ... */
        rc = opal_pci_set_phb_mem_window(opal->phb_id,
                                         window_type,
                                         segment_size);
 #endif
 
-       pr_info("  %d (%d) PE's M32: 0x%x [segment=0x%x]"
-               " IO: 0x%x [segment=0x%x]\n",
-               phb->ioda.total_pe,
-               phb->ioda.reserved_pe,
-               phb->ioda.m32_size, phb->ioda.m32_segsize,
-               phb->ioda.io_size, phb->ioda.io_segsize);
+       pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+               phb->ioda.total_pe, phb->ioda.reserved_pe,
+               phb->ioda.m32_size, phb->ioda.m32_segsize);
+       if (phb->ioda.m64_size)
+               pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
+                       phb->ioda.m64_size, phb->ioda.m64_segsize);
+       if (phb->ioda.io_size)
+               pr_info("                  IO: 0x%x [segment=0x%x]\n",
+                       phb->ioda.io_size, phb->ioda.io_segsize);
+
 
        phb->hose->ops = &pnv_pci_ops;
 #ifdef CONFIG_EEH
                ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
                ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
        }
+
+       /* Configure M64 window */
+       if (phb->init_m64 && phb->init_m64(phb))
+               hose->mem_resources[1].flags = 0;
 }
 
 void __init pnv_pci_init_ioda2_phb(struct device_node *np)