#define Q_BASE_RWA                     (1UL << 62)
 #define Q_BASE_ADDR_MASK               GENMASK_ULL(51, 5)
 #define Q_BASE_LOG2SIZE                        GENMASK(4, 0)
+#define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
 
 /*
  * Stream table.
                                        FIELD_GET(ARM64_TCR_##fld, tcr))
 
 /* Command queue */
-#define CMDQ_ENT_DWORDS                        2
-#define CMDQ_MAX_SZ_SHIFT              8
+#define CMDQ_ENT_SZ_SHIFT              4
+#define CMDQ_ENT_DWORDS                        ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
+#define CMDQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
 
 #define CMDQ_CONS_ERR                  GENMASK(30, 24)
 #define CMDQ_ERR_CERROR_NONE_IDX       0
 #define CMDQ_SYNC_1_MSIADDR_MASK       GENMASK_ULL(51, 2)
 
 /* Event queue */
-#define EVTQ_ENT_DWORDS                        4
-#define EVTQ_MAX_SZ_SHIFT              7
+#define EVTQ_ENT_SZ_SHIFT              5
+#define EVTQ_ENT_DWORDS                        ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
+#define EVTQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
 
 #define EVTQ_0_ID                      GENMASK_ULL(7, 0)
 
 /* PRI queue */
-#define PRIQ_ENT_DWORDS                        2
-#define PRIQ_MAX_SZ_SHIFT              8
+#define PRIQ_ENT_SZ_SHIFT              4
+#define PRIQ_ENT_DWORDS                        ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
+#define PRIQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
 
 #define PRIQ_0_SID                     GENMASK_ULL(31, 0)
 #define PRIQ_0_SSID                    GENMASK_ULL(51, 32)
 /* High-level queue accessors */
 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 {
-       memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
+       memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
        cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 
        switch (ent->opcode) {
                                   struct arm_smmu_queue *q,
                                   unsigned long prod_off,
                                   unsigned long cons_off,
-                                  size_t dwords)
+                                  size_t dwords, const char *name)
 {
-       size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
+       size_t qsz;
+
+       do {
+               qsz = ((1 << q->max_n_shift) * dwords) << 3;
+               q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
+                                             GFP_KERNEL);
+               if (q->base || qsz < PAGE_SIZE)
+                       break;
+
+               q->max_n_shift--;
+       } while (1);
 
-       q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
        if (!q->base) {
-               dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
-                       qsz);
+               dev_err(smmu->dev,
+                       "failed to allocate queue (0x%zx bytes) for %s\n",
+                       qsz, name);
                return -ENOMEM;
        }
 
+       if (!WARN_ON(q->base_dma & (qsz - 1))) {
+               dev_info(smmu->dev, "allocated %u entries for %s\n",
+                        1 << q->max_n_shift, name);
+       }
+
        q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
        q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
        q->ent_dwords   = dwords;
        /* cmdq */
        spin_lock_init(&smmu->cmdq.lock);
        ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
-                                     ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
+                                     ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
+                                     "cmdq");
        if (ret)
                return ret;
 
        /* evtq */
        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
-                                     ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
+                                     ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
+                                     "evtq");
        if (ret)
                return ret;
 
                return 0;
 
        return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
-                                      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
+                                      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
+                                      "priq");
 }
 
 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
                return -ENXIO;
        }
 
-       /* Queue sizes, capped at 4k */
+       /* Queue sizes, capped to ensure natural alignment */
        smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
                                         FIELD_GET(IDR1_CMDQS, reg));
        if (!smmu->cmdq.q.max_n_shift) {