#define ID0_NTS                                (1 << 28)
 #define ID0_SMS                                (1 << 27)
 #define ID0_ATOSNS                     (1 << 26)
+#define ID0_PTFS_NO_AARCH32            (1 << 25)
+#define ID0_PTFS_NO_AARCH32S           (1 << 24)
 #define ID0_CTTW                       (1 << 14)
 #define ID0_NUMIRPT_SHIFT              16
 #define ID0_NUMIRPT_MASK               0xff
 #define ARM_SMMU_FEAT_TRANS_NESTED     (1 << 4)
 #define ARM_SMMU_FEAT_TRANS_OPS                (1 << 5)
 #define ARM_SMMU_FEAT_VMID16           (1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K   (1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K  (1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K  (1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L    (1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S    (1 << 11)
        u32                             features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
        u32                             cavium_id_base; /* Specific to Cavium */
 };
 
+enum arm_smmu_context_fmt {
+       ARM_SMMU_CTX_FMT_NONE,
+       ARM_SMMU_CTX_FMT_AARCH64,
+       ARM_SMMU_CTX_FMT_AARCH32_L,
+       ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
 struct arm_smmu_cfg {
        u8                              cbndx;
        u8                              irptndx;
        u32                             cbar;
+       enum arm_smmu_context_fmt       fmt;
 };
 #define INVALID_IRPTNDX                        0xff
 
                reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
                reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
-               if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) {
+               if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
                        iova &= ~12UL;
                        iova |= ARM_SMMU_CB_ASID(smmu, cfg);
                        do {
                                writel_relaxed(iova, reg);
                                iova += granule;
                        } while (size -= granule);
-#ifdef CONFIG_64BIT
                } else {
                        iova >>= 12;
                        iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
                                writeq_relaxed(iova, reg);
                                iova += granule >> 12;
                        } while (size -= granule);
-#endif
                }
-#ifdef CONFIG_64BIT
        } else if (smmu->version == ARM_SMMU_V2) {
                reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
                reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
                        smmu_write_atomic_lq(iova, reg);
                        iova += granule >> 12;
                } while (size -= granule);
-#endif
        } else {
                reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
                writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
        cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 
        if (smmu->version > ARM_SMMU_V1) {
-#ifdef CONFIG_64BIT
-               reg = CBA2R_RW64_64BIT;
-#else
-               reg = CBA2R_RW64_32BIT;
-#endif
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+                       reg = CBA2R_RW64_64BIT;
+               else
+                       reg = CBA2R_RW64_32BIT;
                /* 16-bit VMIDs live in CBA2R */
                if (smmu->features & ARM_SMMU_FEAT_VMID16)
                        reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
        if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
                smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
+       /*
+        * Choosing a suitable context format is even more fiddly. Until we
+        * grow some way for the caller to express a preference, and/or move
+        * the decision into the io-pgtable code where it arguably belongs,
+        * just aim for the closest thing to the rest of the system, and hope
+        * that the hardware isn't esoteric enough that we can't assume AArch64
+        * support to be a superset of AArch32 support...
+        */
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
+               cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+       if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
+           (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
+                              ARM_SMMU_FEAT_FMT_AARCH64_16K |
+                              ARM_SMMU_FEAT_FMT_AARCH64_4K)))
+               cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
+
+       if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
        switch (smmu_domain->stage) {
        case ARM_SMMU_DOMAIN_S1:
                cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
                start = smmu->num_s2_context_banks;
                ias = smmu->va_size;
                oas = smmu->ipa_size;
-               if (IS_ENABLED(CONFIG_64BIT))
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
                        fmt = ARM_64_LPAE_S1;
-               else
+               } else {
                        fmt = ARM_32_LPAE_S1;
+                       ias = min(ias, 32UL);
+                       oas = min(oas, 40UL);
+               }
                break;
        case ARM_SMMU_DOMAIN_NESTED:
                /*
                start = 0;
                ias = smmu->ipa_size;
                oas = smmu->pa_size;
-               if (IS_ENABLED(CONFIG_64BIT))
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
                        fmt = ARM_64_LPAE_S2;
-               else
+               } else {
                        fmt = ARM_32_LPAE_S2;
+                       ias = min(ias, 40UL);
+                       oas = min(oas, 40UL);
+               }
                break;
        default:
                ret = -EINVAL;
                                           ID0_NUMSIDB_MASK;
        }
 
+       if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
+               smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
+               if (!(id & ID0_PTFS_NO_AARCH32S))
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
+       }
+
        /* ID1 */
        id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
        smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
 
        if (smmu->version == ARM_SMMU_V1) {
                smmu->va_size = smmu->ipa_size;
-               size = SZ_4K | SZ_2M | SZ_1G;
        } else {
                size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
                smmu->va_size = arm_smmu_id_size_to_bits(size);
-#ifndef CONFIG_64BIT
-               smmu->va_size = min(32UL, smmu->va_size);
-#endif
-               size = 0;
                if (id & ID2_PTFS_4K)
-                       size |= SZ_4K | SZ_2M | SZ_1G;
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
                if (id & ID2_PTFS_16K)
-                       size |= SZ_16K | SZ_32M;
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
                if (id & ID2_PTFS_64K)
-                       size |= SZ_64K | SZ_512M;
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
        }
 
+       /* Now we've corralled the various formats, what'll it do? */
+       size = 0;
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
+               size |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
+       if (smmu->features &
+           (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
+               size |= SZ_4K | SZ_2M | SZ_1G;
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
+               size |= SZ_16K | SZ_32M;
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
+               size |= SZ_64K | SZ_512M;
+
        arm_smmu_ops.pgsize_bitmap &= size;
        dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size);