From: David Woodhouse Date: Fri, 6 Jan 2023 18:18:49 +0000 (+0000) Subject: A bit more gnttab X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Fgnttab-mremap;p=users%2Fdwmw2%2Fqemu.git A bit more gnttab Signed-off-by: David Woodhouse --- diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h index 1d1ca64e6f..d797608bd1 100644 --- a/hw/i386/kvm/xen_evtchn.h +++ b/hw/i386/kvm/xen_evtchn.h @@ -17,7 +17,6 @@ #include "hw/sysbus.h" typedef uint32_t evtchn_port_t; -#define DOMID_QEMU 0 void xen_evtchn_create(void); int xen_evtchn_soft_reset(void); diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index b52a6bb7ad..0e96e3760f 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -42,6 +42,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) +static struct gnttab_backend_ops emu_gnttab_backend_ops; + struct XenGnttabState { /*< private >*/ SysBusDevice busdev; @@ -61,7 +63,7 @@ struct XenGnttabState { MemoryRegion *gnt_aliases; uint64_t *gnt_frame_gpas; - uint32_t *map_track; + uint8_t *map_track; }; struct XenGnttabState *xen_gnttab_singleton; @@ -93,11 +95,15 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) s->gnt_frame_gpas[i] = INVALID_GPA; } + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); qemu_mutex_init(&s->gnt_lock); xen_gnttab_singleton = s; - s->map_track = g_new0(uint32_t, s->max_frames * ENTRIES_PER_FRAME_V1); + s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); + + xen_gnttab_ops = &emu_gnttab_backend_ops; } static int xen_gnttab_post_load(void *opaque, int version_id) @@ -244,15 +250,109 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size) return 0; } -struct xengntdev_handle { - GHashTable *maps; +/* Track per-open refs, to allow close() to clean up. */ +struct active_ref { + union { + grant_ref_t gref0; + grant_ref_t *grefs; + } gref; + union { + MemoryRegionSection mrs0; + MemoryRegionSection *mrss; + } mrs; + uint32_t nr_refs; + int prot; }; -/* Track per-open refs, to allow close() to clean up. */ -struct instance_map { - void *virt_addr; - grant_ref_t gref; - uint32_t count; +static void gnt_unref(XenGnttabState *s, grant_ref_t ref, + MemoryRegionSection *mrs, int prot) +{ + if (mrs && mrs->mr) { + if (prot & PROT_WRITE) { + memory_region_set_dirty(mrs->mr, mrs->offset_within_region, + XEN_PAGE_SIZE); + } + memory_region_unref(mrs->mr); + mrs->mr = NULL; + } + assert(s->map_track[ref] != 0); + + if (--s->map_track[ref] == 0) { + grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; + qatomic_and(&gnt_p->flags, ~(GTF_reading | GTF_writing)); + } +} + +static void *gnt_ref(XenGnttabState *s, grant_ref_t ref, + MemoryRegionSection *mrs, int prot) +{ + uint16_t mask = GTF_type_mask | GTF_sub_page; + volatile grant_entry_v1_t *gnt_p; + grant_entry_v1_t gnt; + int retries = 0; + uint64_t gpa; + void *hva; + + if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || + s->map_track[ref] == UINT8_MAX) { + return NULL; + } + + if (prot & PROT_WRITE) { + mask |= GTF_readonly; + } + + gnt_p = &s->entries.v1[ref]; + + for (;;) { + uint16_t new_flags; + gnt = *gnt_p; + + if ((gnt.flags & mask) != GTF_permit_access || + gnt.domid != DOMID_QEMU) { + return NULL; + } + + new_flags = gnt.flags | GTF_reading; + if (prot & PROT_WRITE) { + new_flags |= GTF_writing; + } + + if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { + break; + } + + /* + * The guest can legitimately be changing the GTF_readonly flag. Allow + * that, but don't let a malicious guest cause a livelock. + */ + if (retries++ >= 5) { + return NULL; + } + } + + gpa = gnt.frame << XEN_PAGE_SHIFT; + s->map_track[ref]++; + + *mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); + if (!mrs->mr || memory_region_get_ram_addr(mrs->mr) == RAM_ADDR_INVALID || + mrs->size < XEN_PAGE_SIZE) { + gnt_unref(s, ref, mrs, 0); + return NULL; + } + + hva = qemu_map_ram_ptr(mrs->mr->ram_block, + mrs->offset_within_region); + if (!hva) { + gnt_unref(s, ref, mrs, 0); + return NULL; + } + + return hva; +} + +struct xengntdev_handle { + GHashTable *active_maps; }; static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, uint32_t nr_grants) @@ -263,12 +363,140 @@ static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, uint32_t n static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, uint32_t count, uint32_t domid, uint32_t *refs, int prot) { - return NULL; + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + void *ram_addr, *virtaddr = NULL; + + if (!s) { + return NULL; + } + + if (domid != xen_domid) { + return NULL; + } + + if (!count || count > 4096) { + return NULL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_new0(struct active_ref, 1); + act->nr_refs = count; + act->prot = prot; + + if (count == 1) { + ram_addr = gnt_ref(s, refs[0], &act->mrs.mrs0, prot); + if (!ram_addr) { + goto out; + } + virtaddr = mremap(ram_addr, XEN_PAGE_SIZE, XEN_PAGE_SIZE, + MREMAP_MAYMOVE | MREMAP_DONTUNMAP, NULL); + if (!virtaddr) { + goto out; + } + } + if (count == 1) { + act->gref.gref0 = refs[0]; + } else { + int i; + + /* Need to preallocate enough virtual address space in one hit */ + virtaddr = mmap(NULL, count * XEN_PAGE_SIZE, prot, MAP_ANONYMOUS, -1, 0); + if (!virtaddr) { + goto out; + } + + act->gref.grefs = g_memdup(refs, sizeof(*refs) * count); + act->mrs.mrss = g_new0(MemoryRegionSection, count); + + for (i = 0; i < count; i++) { + ram_addr = gnt_ref(s, refs[i], &act->mrs.mrss[i], prot); + if (!ram_addr || !mremap(ram_addr, XEN_PAGE_SIZE, XEN_PAGE_SIZE, + MREMAP_MAYMOVE | MREMAP_DONTUNMAP | MREMAP_FIXED, + virtaddr + (i * XEN_PAGE_SIZE))) { + /* unwind */ + while (i--) { + gnt_unref(s, refs[i], &act->mrs.mrss[i], PROT_READ); + } + munmap(virtaddr, count * XEN_PAGE_SIZE); + virtaddr = NULL; + goto out; + } + } + } + + out: + if (virtaddr) { + g_hash_table_insert(xgt->active_maps, virtaddr, act); + } else { + if (count > 1) { + g_free(act->mrs.mrss); + g_free(act->gref.grefs); + } + g_free(act); + } + + return virtaddr; +} + +static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) +{ + XenGnttabState *s = user_data; + struct active_ref *act = value; + + munmap(key, act->nr_refs * XEN_PAGE_SIZE); + + if (act->nr_refs == 1) { + gnt_unref(s, act->gref.gref0, &act->mrs.mrs0, act->prot); + if (act->prot & PROT_WRITE) { + memory_region_set_dirty(act->mrs.mrs0.mr, + act->mrs.mrs0.offset_within_region, + XEN_PAGE_SIZE); + } + memory_region_unref(act->mrs.mrs0.mr); + } else { + int i; + for (i = 0; i < act->nr_refs; i++) { + gnt_unref(s, act->gref.grefs[i], &act->mrs.mrss[i], act->prot); + if (act->prot & PROT_WRITE) { + memory_region_set_dirty(act->mrs.mrss[i].mr, + act->mrs.mrss[i].offset_within_region, + XEN_PAGE_SIZE); + } + memory_region_unref(act->mrs.mrss[i].mr); + } + g_free(act->gref.grefs); + } + + g_free(act); + return true; } static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, void *start_address, uint32_t count) { + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + return -ENOTSUP; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, start_address); + if (!act) { + return -ENOENT; + } + + if (count != act->nr_refs) { + return -EINVAL; + } + + do_unmap(start_address, act, s); + g_hash_table_remove(xgt->active_maps, start_address); + return 0; } @@ -283,18 +511,25 @@ static struct xengntdev_handle *xen_be_gnttab_open(void) { struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); - xgt->maps = g_hash_table_new(g_direct_hash, g_direct_equal); + xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); return xgt; } static int xen_be_gnttab_close(struct xengntdev_handle *xgt) { - g_hash_table_destroy(xgt->maps); + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); + g_hash_table_destroy(xgt->active_maps); g_free(xgt); return 0; } -struct gnttab_backend_ops libxengnttab_backend_ops = { +static struct gnttab_backend_ops emu_gnttab_backend_ops = { .open = xen_be_gnttab_open, .close = xen_be_gnttab_close, .grant_copy = xen_be_gnttab_copy, diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h index 6e374a06ee..5c81094cc1 100644 --- a/include/sysemu/kvm_xen.h +++ b/include/sysemu/kvm_xen.h @@ -16,6 +16,8 @@ #define INVALID_GPA UINT64_MAX #define INVALID_GFN UINT64_MAX +#define DOMID_QEMU 0 + uint32_t kvm_xen_get_caps(void); void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id); void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);