#define BOOK3S_HFLAG_SLB                       0x2
 #define BOOK3S_HFLAG_PAIRED_SINGLE             0x4
 #define BOOK3S_HFLAG_NATIVE_PS                 0x8
+#define BOOK3S_HFLAG_MULTI_PGSIZE              0x10
+#define BOOK3S_HFLAG_NEW_TLBIE                 0x20
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
 
        struct hlist_node list_pte_long;
        struct hlist_node list_vpte;
        struct hlist_node list_vpte_long;
+#ifdef CONFIG_PPC_BOOK3S_64
+       struct hlist_node list_vpte_64k;
+#endif
        struct rcu_head rcu_head;
        u64 host_vpn;
        u64 pfn;
        struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
        struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
        struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
+#ifdef CONFIG_PPC_BOOK3S_64
+       struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
+#endif
        int hpte_cache_count;
        spinlock_t mmu_lock;
 };
 
 #define HPTEG_HASH_BITS_PTE_LONG       12
 #define HPTEG_HASH_BITS_VPTE           13
 #define HPTEG_HASH_BITS_VPTE_LONG      5
+#define HPTEG_HASH_BITS_VPTE_64K       11
 #define HPTEG_HASH_NUM_PTE             (1 << HPTEG_HASH_BITS_PTE)
 #define HPTEG_HASH_NUM_PTE_LONG                (1 << HPTEG_HASH_BITS_PTE_LONG)
 #define HPTEG_HASH_NUM_VPTE            (1 << HPTEG_HASH_BITS_VPTE)
 #define HPTEG_HASH_NUM_VPTE_LONG       (1 << HPTEG_HASH_BITS_VPTE_LONG)
+#define HPTEG_HASH_NUM_VPTE_64K                (1 << HPTEG_HASH_BITS_VPTE_64K)
 
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM                        0x0fffffffffffffffULL
        bool may_read           : 1;
        bool may_write          : 1;
        bool may_execute        : 1;
+       u8 page_size;           /* MMU_PAGE_xxx */
 };
 
 struct kvmppc_mmu {
        bool large      : 1;    /* PTEs are 16MB */
        bool tb         : 1;    /* 1TB segment */
        bool class      : 1;
+       u8 base_page_size;      /* MMU_PAGE_xxx */
 };
 
 # ifdef CONFIG_PPC_FSL_BOOK3E
 
        return kvmppc_slb_calc_vpn(slb, eaddr);
 }
 
+static int mmu_pagesize(int mmu_pg)
+{
+       switch (mmu_pg) {
+       case MMU_PAGE_64K:
+               return 16;
+       case MMU_PAGE_16M:
+               return 24;
+       }
+       return 12;
+}
+
 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
 {
-       return slbe->large ? 24 : 12;
+       return mmu_pagesize(slbe->base_page_size);
 }
 
 static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
        avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
        avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
 
-       if (p < 24)
-               avpn >>= ((80 - p) - 56) - 8;
+       if (p < 16)
+               avpn >>= ((80 - p) - 56) - 8;   /* 16 - p */
        else
-               avpn <<= 8;
+               avpn <<= p - 16;
 
        return avpn;
 }
 
+/*
+ * Return page size encoded in the second word of a HPTE, or
+ * -1 for an invalid encoding for the base page size indicated by
+ * the SLB entry.  This doesn't handle mixed pagesize segments yet.
+ */
+static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
+{
+       switch (slbe->base_page_size) {
+       case MMU_PAGE_64K:
+               if ((r & 0xf000) == 0x1000)
+                       return MMU_PAGE_64K;
+               break;
+       case MMU_PAGE_16M:
+               if ((r & 0xff000) == 0)
+                       return MMU_PAGE_16M;
+               break;
+       }
+       return -1;
+}
+
 static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                                struct kvmppc_pte *gpte, bool data)
 {
        u8 pp, key = 0;
        bool found = false;
        bool second = false;
+       int pgsize;
        ulong mp_ea = vcpu->arch.magic_page_ea;
 
        /* Magic page override */
                gpte->may_execute = true;
                gpte->may_read = true;
                gpte->may_write = true;
+               gpte->page_size = MMU_PAGE_4K;
 
                return 0;
        }
        v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
                HPTE_V_SECONDARY;
 
+       pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
+
 do_second:
        ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
        if (kvm_is_error_hva(ptegp))
        for (i=0; i<16; i+=2) {
                /* Check all relevant fields of 1st dword */
                if ((pteg[i] & v_mask) == v_val) {
+                       /* If large page bit is set, check pgsize encoding */
+                       if (slbe->large &&
+                           (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+                               pgsize = decode_pagesize(slbe, pteg[i+1]);
+                               if (pgsize < 0)
+                                       continue;
+                       }
                        found = true;
                        break;
                }
        v = pteg[i];
        r = pteg[i+1];
        pp = (r & HPTE_R_PP) | key;
-       eaddr_mask = 0xFFF;
 
        gpte->eaddr = eaddr;
        gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
-       if (slbe->large)
-               eaddr_mask = 0xFFFFFF;
+
+       eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
        gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+       gpte->page_size = pgsize;
        gpte->may_execute = ((r & HPTE_R_N) ? false : true);
        gpte->may_read = false;
        gpte->may_write = false;
        slbe->nx    = (rs & SLB_VSID_N) ? 1 : 0;
        slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
 
+       slbe->base_page_size = MMU_PAGE_4K;
+       if (slbe->large) {
+               if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
+                       switch (rs & SLB_VSID_LP) {
+                       case SLB_VSID_LP_00:
+                               slbe->base_page_size = MMU_PAGE_16M;
+                               break;
+                       case SLB_VSID_LP_01:
+                               slbe->base_page_size = MMU_PAGE_64K;
+                               break;
+                       }
+               } else
+                       slbe->base_page_size = MMU_PAGE_16M;
+       }
+
        slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
        slbe->origv = rs;
 
 
        dprintk("KVM MMU: tlbie(0x%lx)\n", va);
 
-       if (large)
-               mask = 0xFFFFFF000ULL;
+       /*
+        * The tlbie instruction changed behaviour starting with
+        * POWER6.  POWER6 and later don't have the large page flag
+        * in the instruction but in the RB value, along with bits
+        * indicating page and segment sizes.
+        */
+       if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
+               /* POWER6 or later */
+               if (va & 1) {           /* L bit */
+                       if ((va & 0xf000) == 0x1000)
+                               mask = 0xFFFFFFFF0ULL;  /* 64k page */
+                       else
+                               mask = 0xFFFFFF000ULL;  /* 16M page */
+               }
+       } else {
+               /* older processors, e.g. PPC970 */
+               if (large)
+                       mask = 0xFFFFFF000ULL;
+       }
        kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
 }
 
 
                       HPTEG_HASH_BITS_VPTE_LONG);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
+{
+       return hash_64((vpage & 0xffffffff0ULL) >> 4,
+                      HPTEG_HASH_BITS_VPTE_64K);
+}
+#endif
+
 void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
        u64 index;
        hlist_add_head_rcu(&pte->list_vpte_long,
                           &vcpu3s->hpte_hash_vpte_long[index]);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+       /* Add to vPTE_64k list */
+       index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
+       hlist_add_head_rcu(&pte->list_vpte_64k,
+                          &vcpu3s->hpte_hash_vpte_64k[index]);
+#endif
+
        spin_unlock(&vcpu3s->mmu_lock);
 }
 
        hlist_del_init_rcu(&pte->list_pte_long);
        hlist_del_init_rcu(&pte->list_vpte);
        hlist_del_init_rcu(&pte->list_vpte_long);
+#ifdef CONFIG_PPC_BOOK3S_64
+       hlist_del_init_rcu(&pte->list_vpte_64k);
+#endif
 
        spin_unlock(&vcpu3s->mmu_lock);
 
        rcu_read_unlock();
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Flush with mask 0xffffffff0 */
+static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+       struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+       struct hlist_head *list;
+       struct hpte_cache *pte;
+       u64 vp_mask = 0xffffffff0ULL;
+
+       list = &vcpu3s->hpte_hash_vpte_64k[
+               kvmppc_mmu_hash_vpte_64k(guest_vp)];
+
+       rcu_read_lock();
+
+       /* Check the list for matching entries and invalidate */
+       hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
+               if ((pte->pte.vpage & vp_mask) == guest_vp)
+                       invalidate_pte(vcpu, pte);
+
+       rcu_read_unlock();
+}
+#endif
+
 /* Flush with mask 0xffffff000 */
 static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
        case 0xfffffffffULL:
                kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
                break;
+#ifdef CONFIG_PPC_BOOK3S_64
+       case 0xffffffff0ULL:
+               kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
+               break;
+#endif
        case 0xffffff000ULL:
                kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
                break;
                                  ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
        kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
                                  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+#ifdef CONFIG_PPC_BOOK3S_64
+       kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
+                                 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
+#endif
 
        spin_lock_init(&vcpu3s->mmu_lock);
 
 
        if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
                to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
 
+       /*
+        * If they're asking for POWER6 or later, set the flag
+        * indicating that we can do multiple large page sizes
+        * and 1TB segments.
+        * Also set the flag that indicates that tlbie has the large
+        * page bit in the RB operand instead of the instruction.
+        */
+       switch (PVR_VER(pvr)) {
+       case PVR_POWER6:
+       case PVR_POWER7:
+       case PVR_POWER7p:
+       case PVR_POWER8:
+               vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+                       BOOK3S_HFLAG_NEW_TLBIE;
+               break;
+       }
+
 #ifdef CONFIG_PPC_BOOK3S_32
        /* 32 bit Book3S always has 32 byte dcbz */
        vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
        vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 
 #ifdef CONFIG_PPC_BOOK3S_64
-       /* default to book3s_64 (970fx) */
+       /*
+        * Default to the same as the host if we're on sufficiently
+        * recent machine that we have 1TB segments;
+        * otherwise default to PPC970FX.
+        */
        vcpu->arch.pvr = 0x3C0301;
+       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               vcpu->arch.pvr = mfspr(SPRN_PVR);
 #else
        /* default to book3s_32 (750) */
        vcpu->arch.pvr = 0x84202;
 #ifdef CONFIG_PPC64
 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 {
-       info->flags = KVM_PPC_1T_SEGMENTS;
+       long int i;
+       struct kvm_vcpu *vcpu;
+
+       info->flags = 0;
 
        /* SLB is always 64 entries */
        info->slb_size = 64;
        info->sps[0].enc[0].page_shift = 12;
        info->sps[0].enc[0].pte_enc = 0;
 
+       /*
+        * 64k large page size.
+        * We only want to put this in if the CPUs we're emulating
+        * support it, but unfortunately we don't have a vcpu easily
+        * to hand here to test.  Just pick the first vcpu, and if
+        * that doesn't exist yet, report the minimum capability,
+        * i.e., no 64k pages.
+        * 1T segment support goes along with 64k pages.
+        */
+       i = 1;
+       vcpu = kvm_get_vcpu(kvm, 0);
+       if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+               info->flags = KVM_PPC_1T_SEGMENTS;
+               info->sps[i].page_shift = 16;
+               info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+               info->sps[i].enc[0].page_shift = 16;
+               info->sps[i].enc[0].pte_enc = 1;
+               ++i;
+       }
+
        /* Standard 16M large page size segment */
-       info->sps[1].page_shift = 24;
-       info->sps[1].slb_enc = SLB_VSID_L;
-       info->sps[1].enc[0].page_shift = 24;
-       info->sps[1].enc[0].pte_enc = 0;
+       info->sps[i].page_shift = 24;
+       info->sps[i].slb_enc = SLB_VSID_L;
+       info->sps[i].enc[0].page_shift = 24;
+       info->sps[i].enc[0].pte_enc = 0;
 
        return 0;
 }