]> www.infradead.org Git - users/dwmw2/qemu.git/commitdiff
A bit more gnttab gnttab-mremap
authorDavid Woodhouse <dwmw@amazon.co.uk>
Fri, 6 Jan 2023 18:18:49 +0000 (18:18 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Sat, 7 Jan 2023 04:43:49 +0000 (04:43 +0000)
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
hw/i386/kvm/xen_evtchn.h
hw/i386/kvm/xen_gnttab.c
include/sysemu/kvm_xen.h

index 1d1ca64e6f83fae1fe0078bcc68f103f8541419f..d797608bd1c424da97329763cff89f8573ebc240 100644 (file)
@@ -17,7 +17,6 @@
 #include "hw/sysbus.h"
 
 typedef uint32_t evtchn_port_t;
-#define DOMID_QEMU      0
 
 void xen_evtchn_create(void);
 int xen_evtchn_soft_reset(void);
index b52a6bb7ad05cf4d11b0648532a666589983800a..0e96e3760f627dc7ba61b74de9750a62cf14510c 100644 (file)
@@ -42,6 +42,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
 
+static struct gnttab_backend_ops emu_gnttab_backend_ops;
+
 struct XenGnttabState {
     /*< private >*/
     SysBusDevice busdev;
@@ -61,7 +63,7 @@ struct XenGnttabState {
     MemoryRegion *gnt_aliases;
     uint64_t *gnt_frame_gpas;
 
-    uint32_t *map_track;
+    uint8_t *map_track;
 };
 
 struct XenGnttabState *xen_gnttab_singleton;
@@ -93,11 +95,15 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
         s->gnt_frame_gpas[i] = INVALID_GPA;
     }
 
+    s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+    s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
     qemu_mutex_init(&s->gnt_lock);
 
     xen_gnttab_singleton = s;
 
-    s->map_track = g_new0(uint32_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+    s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+    xen_gnttab_ops = &emu_gnttab_backend_ops;
 }
 
 static int xen_gnttab_post_load(void *opaque, int version_id)
@@ -244,15 +250,109 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size)
     return 0;
 }
 
-struct xengntdev_handle {
-    GHashTable *maps;
+/* Track per-open refs, to allow close() to clean up. */
+struct active_ref {
+    union {
+        grant_ref_t gref0;
+        grant_ref_t *grefs;
+    } gref;
+    union {
+        MemoryRegionSection mrs0;
+        MemoryRegionSection *mrss;
+    } mrs;
+    uint32_t nr_refs;
+    int prot;
 };
 
-/* Track per-open refs, to allow close() to clean up. */
-struct instance_map {
-    void *virt_addr;
-    grant_ref_t gref;
-    uint32_t count;
+static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
+                      MemoryRegionSection *mrs, int prot)
+{
+    if (mrs && mrs->mr) {
+       if (prot & PROT_WRITE) {
+            memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
+                                    XEN_PAGE_SIZE);
+        }
+        memory_region_unref(mrs->mr);
+        mrs->mr = NULL;
+    }
+    assert(s->map_track[ref] != 0);
+
+    if (--s->map_track[ref] == 0) {
+        grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
+        qatomic_and(&gnt_p->flags, ~(GTF_reading | GTF_writing));
+    }
+}
+
+static void *gnt_ref(XenGnttabState *s, grant_ref_t ref,
+                     MemoryRegionSection *mrs, int prot)
+{
+    uint16_t mask = GTF_type_mask | GTF_sub_page;
+    volatile grant_entry_v1_t *gnt_p;
+    grant_entry_v1_t gnt;
+    int retries = 0;
+    uint64_t gpa;
+    void *hva;
+
+    if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+        s->map_track[ref] == UINT8_MAX) {
+        return NULL;
+    }
+
+    if (prot & PROT_WRITE) {
+        mask |= GTF_readonly;
+    }
+
+    gnt_p = &s->entries.v1[ref];
+
+    for (;;) {
+        uint16_t new_flags;
+        gnt = *gnt_p;
+
+        if ((gnt.flags & mask) != GTF_permit_access ||
+            gnt.domid != DOMID_QEMU) {
+            return NULL;
+        }
+
+        new_flags = gnt.flags | GTF_reading;
+        if (prot & PROT_WRITE) {
+            new_flags |= GTF_writing;
+        }
+
+        if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
+            break;
+        }
+
+        /*
+         * The guest can legitimately be changing the GTF_readonly flag. Allow
+         * that, but don't let a malicious guest cause a livelock.
+         */
+        if (retries++ >= 5) {
+            return NULL;
+        }
+    }
+
+    gpa = gnt.frame << XEN_PAGE_SHIFT;
+    s->map_track[ref]++;
+
+    *mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
+    if (!mrs->mr || memory_region_get_ram_addr(mrs->mr) == RAM_ADDR_INVALID ||
+        mrs->size < XEN_PAGE_SIZE) {
+        gnt_unref(s, ref, mrs, 0);
+        return NULL;
+    }
+
+    hva = qemu_map_ram_ptr(mrs->mr->ram_block,
+                           mrs->offset_within_region);
+    if (!hva) {
+        gnt_unref(s, ref, mrs, 0);
+        return NULL;
+    }
+
+    return hva;
+}
+
+struct xengntdev_handle {
+    GHashTable *active_maps;
 };
 
 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, uint32_t nr_grants)
@@ -263,12 +363,140 @@ static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, uint32_t n
 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, uint32_t count,
                                     uint32_t domid, uint32_t *refs, int prot)
 {
-    return NULL;
+    XenGnttabState *s = xen_gnttab_singleton;
+    struct active_ref *act;
+    void *ram_addr, *virtaddr = NULL;
+
+    if (!s) {
+        return NULL;
+    }
+
+    if (domid != xen_domid) {
+        return NULL;
+    }
+
+    if (!count || count > 4096) {
+        return NULL;
+    }
+
+    QEMU_LOCK_GUARD(&s->gnt_lock);
+
+    act = g_new0(struct active_ref, 1);
+    act->nr_refs = count;
+    act->prot = prot;
+
+    if (count == 1) {
+        ram_addr = gnt_ref(s, refs[0], &act->mrs.mrs0, prot);
+        if (!ram_addr) {
+            goto out;
+        }
+        virtaddr = mremap(ram_addr, XEN_PAGE_SIZE, XEN_PAGE_SIZE,
+                          MREMAP_MAYMOVE | MREMAP_DONTUNMAP, NULL);
+        if (!virtaddr) {
+            goto out;
+        }
+    }
+    if (count == 1) {
+        act->gref.gref0 = refs[0];
+    } else {
+        int i;
+
+        /* Need to preallocate enough virtual address space in one hit */
+        virtaddr = mmap(NULL, count * XEN_PAGE_SIZE, prot, MAP_ANONYMOUS, -1, 0);
+        if (!virtaddr) {
+            goto out;
+        }
+
+        act->gref.grefs = g_memdup(refs, sizeof(*refs) * count);
+        act->mrs.mrss = g_new0(MemoryRegionSection, count);
+
+        for (i = 0; i < count; i++) {
+            ram_addr = gnt_ref(s, refs[i], &act->mrs.mrss[i], prot);
+            if (!ram_addr || !mremap(ram_addr, XEN_PAGE_SIZE, XEN_PAGE_SIZE,
+                                     MREMAP_MAYMOVE | MREMAP_DONTUNMAP | MREMAP_FIXED,
+                                     virtaddr + (i * XEN_PAGE_SIZE))) {
+                /* unwind */
+                while (i--) {
+                    gnt_unref(s, refs[i], &act->mrs.mrss[i], PROT_READ);
+                }
+                munmap(virtaddr, count * XEN_PAGE_SIZE);
+                virtaddr = NULL;
+                goto out;
+            }
+        }
+    }
+
+ out:
+    if (virtaddr) {
+        g_hash_table_insert(xgt->active_maps, virtaddr, act);
+    } else {
+        if (count > 1) {
+            g_free(act->mrs.mrss);
+            g_free(act->gref.grefs);
+        }
+        g_free(act);
+    }
+
+    return virtaddr;
+}
+
+static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
+{
+    XenGnttabState *s = user_data;
+    struct active_ref *act = value;
+
+    munmap(key, act->nr_refs * XEN_PAGE_SIZE);
+
+    if (act->nr_refs == 1) {
+        gnt_unref(s, act->gref.gref0, &act->mrs.mrs0, act->prot);
+        if (act->prot & PROT_WRITE) {
+            memory_region_set_dirty(act->mrs.mrs0.mr,
+                                    act->mrs.mrs0.offset_within_region,
+                                    XEN_PAGE_SIZE);
+        }
+        memory_region_unref(act->mrs.mrs0.mr);
+    } else {
+        int i;
+        for (i = 0; i < act->nr_refs; i++) {
+            gnt_unref(s, act->gref.grefs[i], &act->mrs.mrss[i], act->prot);
+            if (act->prot & PROT_WRITE) {
+                memory_region_set_dirty(act->mrs.mrss[i].mr,
+                                        act->mrs.mrss[i].offset_within_region,
+                                        XEN_PAGE_SIZE);
+            }
+            memory_region_unref(act->mrs.mrss[i].mr);
+        }
+        g_free(act->gref.grefs);
+    }
+
+    g_free(act);
+    return true;
 }
 
 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
                                void *start_address, uint32_t count)
 {
+    XenGnttabState *s = xen_gnttab_singleton;
+    struct active_ref *act;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    QEMU_LOCK_GUARD(&s->gnt_lock);
+
+    act = g_hash_table_lookup(xgt->active_maps, start_address);
+    if (!act) {
+        return -ENOENT;
+    }
+
+    if (count != act->nr_refs) {
+        return -EINVAL;
+    }
+
+    do_unmap(start_address, act, s);
+    g_hash_table_remove(xgt->active_maps, start_address);
+
     return 0;
 }
 
@@ -283,18 +511,25 @@ static struct xengntdev_handle *xen_be_gnttab_open(void)
 {
     struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
 
-    xgt->maps = g_hash_table_new(g_direct_hash, g_direct_equal);
+    xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
     return xgt;
 }
 
 static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
 {
-    g_hash_table_destroy(xgt->maps);
+    XenGnttabState *s = xen_gnttab_singleton;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
+    g_hash_table_destroy(xgt->active_maps);
     g_free(xgt);
     return 0;
 }
 
-struct gnttab_backend_ops libxengnttab_backend_ops = {
+static struct gnttab_backend_ops emu_gnttab_backend_ops = {
     .open = xen_be_gnttab_open,
     .close = xen_be_gnttab_close,
     .grant_copy = xen_be_gnttab_copy,
index 6e374a06ee42b4a89978ff572e4f66ac37c9ffcd..5c81094cc195c7beb14b01284300043288464f16 100644 (file)
@@ -16,6 +16,8 @@
 #define INVALID_GPA UINT64_MAX
 #define INVALID_GFN UINT64_MAX
 
+#define DOMID_QEMU      0
+
 uint32_t kvm_xen_get_caps(void);
 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);