]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
iommu/io-pgtable-arm: Add read_and_clear_dirty() support
authorShameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Wed, 3 Jul 2024 10:16:02 +0000 (11:16 +0100)
committerWill Deacon <will@kernel.org>
Wed, 3 Jul 2024 14:45:47 +0000 (15:45 +0100)
.read_and_clear_dirty() IOMMU domain op takes care of reading the dirty
bits (i.e. PTE has DBM set and AP[2] clear) and marshalling into a
bitmap of a given page size.

While reading the dirty bits we also set the PTE AP[2] bit to mark it
as writeable-clean depending on read_and_clear_dirty() flags.

PTE states with respect to DBM bit:

                       DBM bit        AP[2]("RDONLY" bit)
1. writable_clean        1                 1
2. writable_dirty        1                 0
3. read-only             0                 1

Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Link: https://lore.kernel.org/r/20240703101604.2576-4-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
drivers/iommu/io-pgtable-arm.c

index 3d23b924cec1696954c4743e502f6866d795087d..2e57e86163877821e1c32cdf7cebbaa044fcee4e 100644 (file)
@@ -76,6 +76,7 @@
 
 #define ARM_LPAE_PTE_NSTABLE           (((arm_lpae_iopte)1) << 63)
 #define ARM_LPAE_PTE_XN                        (((arm_lpae_iopte)3) << 53)
+#define ARM_LPAE_PTE_DBM               (((arm_lpae_iopte)1) << 51)
 #define ARM_LPAE_PTE_AF                        (((arm_lpae_iopte)1) << 10)
 #define ARM_LPAE_PTE_SH_NS             (((arm_lpae_iopte)0) << 8)
 #define ARM_LPAE_PTE_SH_OS             (((arm_lpae_iopte)2) << 8)
@@ -85,7 +86,7 @@
 
 #define ARM_LPAE_PTE_ATTR_LO_MASK      (((arm_lpae_iopte)0x3ff) << 2)
 /* Ignore the contiguous bit for block splitting */
-#define ARM_LPAE_PTE_ATTR_HI_MASK      (((arm_lpae_iopte)6) << 52)
+#define ARM_LPAE_PTE_ATTR_HI_MASK      (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM)
 #define ARM_LPAE_PTE_ATTR_MASK         (ARM_LPAE_PTE_ATTR_LO_MASK |    \
                                         ARM_LPAE_PTE_ATTR_HI_MASK)
 /* Software bit for solving coherency races */
 
 /* Stage-1 PTE */
 #define ARM_LPAE_PTE_AP_UNPRIV         (((arm_lpae_iopte)1) << 6)
-#define ARM_LPAE_PTE_AP_RDONLY         (((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_RDONLY_BIT     7
+#define ARM_LPAE_PTE_AP_RDONLY         (((arm_lpae_iopte)1) << \
+                                          ARM_LPAE_PTE_AP_RDONLY_BIT)
+#define ARM_LPAE_PTE_AP_WR_CLEAN_MASK  (ARM_LPAE_PTE_AP_RDONLY | \
+                                        ARM_LPAE_PTE_DBM)
 #define ARM_LPAE_PTE_ATTRINDX_SHIFT    2
 #define ARM_LPAE_PTE_nG                        (((arm_lpae_iopte)1) << 11)
 
 
 #define iopte_prot(pte)        ((pte) & ARM_LPAE_PTE_ATTR_MASK)
 
+#define iopte_writeable_dirty(pte)                             \
+       (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM)
+
+#define iopte_set_writeable_clean(ptep)                                \
+       set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep))
+
 struct arm_lpae_io_pgtable {
        struct io_pgtable       iop;
 
@@ -160,6 +171,13 @@ static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
        return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK;
 }
 
+static inline bool iopte_table(arm_lpae_iopte pte, int lvl)
+{
+       if (lvl == (ARM_LPAE_MAX_LEVELS - 1))
+               return false;
+       return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE;
+}
+
 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
                                     struct arm_lpae_io_pgtable *data)
 {
@@ -726,6 +744,97 @@ found_translation:
        return iopte_to_paddr(pte, data) | iova;
 }
 
+struct io_pgtable_walk_data {
+       struct iommu_dirty_bitmap       *dirty;
+       unsigned long                   flags;
+       u64                             addr;
+       const u64                       end;
+};
+
+static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
+                                      struct io_pgtable_walk_data *walk_data,
+                                      arm_lpae_iopte *ptep,
+                                      int lvl);
+
+static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data,
+                                 struct io_pgtable_walk_data *walk_data,
+                                 arm_lpae_iopte *ptep, int lvl)
+{
+       struct io_pgtable *iop = &data->iop;
+       arm_lpae_iopte pte = READ_ONCE(*ptep);
+
+       if (iopte_leaf(pte, lvl, iop->fmt)) {
+               size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+               if (iopte_writeable_dirty(pte)) {
+                       iommu_dirty_bitmap_record(walk_data->dirty,
+                                                 walk_data->addr, size);
+                       if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
+                               iopte_set_writeable_clean(ptep);
+               }
+               walk_data->addr += size;
+               return 0;
+       }
+
+       if (WARN_ON(!iopte_table(pte, lvl)))
+               return -EINVAL;
+
+       ptep = iopte_deref(pte, data);
+       return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1);
+}
+
+static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
+                                      struct io_pgtable_walk_data *walk_data,
+                                      arm_lpae_iopte *ptep,
+                                      int lvl)
+{
+       u32 idx;
+       int max_entries, ret;
+
+       if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+               return -EINVAL;
+
+       if (lvl == data->start_level)
+               max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
+       else
+               max_entries = ARM_LPAE_PTES_PER_TABLE(data);
+
+       for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
+            (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
+               ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
+                                        unsigned long iova, size_t size,
+                                        unsigned long flags,
+                                        struct iommu_dirty_bitmap *dirty)
+{
+       struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       struct io_pgtable_walk_data walk_data = {
+               .dirty = dirty,
+               .flags = flags,
+               .addr = iova,
+               .end = iova + size,
+       };
+       arm_lpae_iopte *ptep = data->pgd;
+       int lvl = data->start_level;
+
+       if (WARN_ON(!size))
+               return -EINVAL;
+       if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1)))
+               return -EINVAL;
+       if (data->iop.fmt != ARM_64_LPAE_S1)
+               return -EINVAL;
+
+       return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl);
+}
+
 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 {
        unsigned long granule, page_sizes;
@@ -804,6 +913,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
                .map_pages      = arm_lpae_map_pages,
                .unmap_pages    = arm_lpae_unmap_pages,
                .iova_to_phys   = arm_lpae_iova_to_phys,
+               .read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
        };
 
        return data;