From: Joao Martins Date: Fri, 28 Sep 2018 17:10:48 +0000 (-0400) Subject: i386/xen: introduce/release domain to xenstored X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=8bae96604edc6a91231de3390ca5bff6f99b2f08;p=users%2Fdwmw2%2Fqemu.git i386/xen: introduce/release domain to xenstored We connect to xenstored and can send the INTRODUCE_DOMAIN message with the pfn and domid. This will allow xenstored to map the domain ring and channel to serve xenbus (domain) client requests. xs_read()'s failure is indicated by NULL return value while @len will report length of error string. If /tool/xenstored/domid mnode does not exist in xenstore then we assume that Domain-0 is where xenstore is hosted. Try to fetch where xenstore domain leaves such that we can create the unbound ports and seed grant table accordingly. Finally seed xenstore with the default entries. The entries comprised at init time (before introduce_domain) and are all domain local paths: /memory/static-max = /memory/target = /memory/videoram = /domid = /cpu//availability = (online|offline) /store/ring-ref = /store/port = All of which have r/w permission alongside parent its domain path (/local/domain/). When we gracefully shutdown or 'quit' via HMP/QMP we properly release the domain with the release_domain() message. Unattended shutdown is handled by xenstored itself and hypervisor propragating the VIRQ_DOM_EXC interrupt to xenstored. qemu allocates the SPECIALPAGE_XENSTORE page as a MAP_PRIVATE | MAP_ANON mapping. The same mapping will be done on xenstored (via /dev/xen/gntdev) as a MAP_SHARED mapping on the SPECIALPAGE_XENSTORE grant-entry. This leads to an interesting breakage of xenstored's rss counter: BUG: Bad rss-counter state mm:00000000acd9575d idx:1 val:-1 BUG: Bad rss-counter state mm:00000000acd9575d idx:3 val:1 Since it seems nonsensical to do a MAP_PRIVATE mapping for a page that is meant to be shared with a different process, we switch to a MAP_SHARED | MAP_ANON mapping. Co-developed-by: Ankur Arora Signed-off-by: Joao Martins Signed-off-by: Ankur Arora Signed-off-by: Boris Ostrovsky --- diff --git a/include/exec/memory.h b/include/exec/memory.h index 1625913f84..fe8bf03158 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -834,8 +834,9 @@ void memory_region_init_iommu(void *_iommu_mr, uint64_t size); /** - * memory_region_init_ram - Initialize RAM memory region. Accesses into the - * region will modify memory directly. + * memory_region_init_ram, memory_region_init_ram_shared + * Initialize RAM memory region. Accesses into the + * region will modify memory directly. * * @mr: the #MemoryRegion to be initialized * @owner: the object that tracks the region's reference count (must be @@ -848,6 +849,7 @@ void memory_region_init_iommu(void *_iommu_mr, * arranges for it to be migrated (by calling vmstate_register_ram() * if @owner is a DeviceState, or vmstate_register_ram_global() if * @owner is NULL). + * The _shared verson allocates a shareable mmap-region. * * TODO: Currently we restrict @owner to being either NULL (for * global RAM regions with no owner) or devices, so that we can @@ -861,6 +863,11 @@ void memory_region_init_ram(MemoryRegion *mr, uint64_t size, Error **errp); +void memory_region_init_ram_shared(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + Error **errp); /** * memory_region_init_rom: Initialize a ROM memory region. * diff --git a/memory.c b/memory.c index 61d66e4441..65ff1ab2ce 100644 --- a/memory.c +++ b/memory.c @@ -3119,6 +3119,31 @@ void memory_region_init_ram(MemoryRegion *mr, vmstate_register_ram(mr, owner_dev); } +void memory_region_init_ram_shared(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + Error *err = NULL; + + memory_region_init_ram_shared_nomigrate(mr, owner, name, size, true, &err); + if (err) { + error_propagate(errp, err); + return; + } + /* + * This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); +} + void memory_region_init_rom(MemoryRegion *mr, struct Object *owner, const char *name, diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c6bce8c5fe..389d3c9648 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5817,6 +5817,7 @@ static Property x86_cpu_properties[] = { xen_gnttab_max_frames, GNTTAB_MAX_FRAMES), DEFINE_PROP_UINT32("xen-gnttab-max-version", X86CPU, xen_gnttab_max_version, 1), + DEFINE_PROP_BOOL("xen-xenbus", X86CPU, xen_xenbus, true), /* * From "Requirements for Implementing the Microsoft diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1c0cb47e7b..aa0bc23636 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1493,6 +1493,7 @@ struct X86CPU { uint32_t xen_minor_version; uint32_t xen_gnttab_max_frames; uint32_t xen_gnttab_max_version; + bool xen_xenbus; }; static inline X86CPU *x86_env_get_cpu(CPUX86State *env) diff --git a/target/i386/xen-proto.h b/target/i386/xen-proto.h index 0965ea7624..a4ab12ab04 100644 --- a/target/i386/xen-proto.h +++ b/target/i386/xen-proto.h @@ -55,6 +55,10 @@ typedef struct XenState { int port; QemuMutex port_lock; Notifier exit; + struct xs_handle *xenstore; + MemoryRegion mr; + int xenstore_pfn; + int xenstore_port; struct XenGrantTable gnttab; } XenState; diff --git a/target/i386/xen.c b/target/i386/xen.c index ce3062b974..6d4a8426e5 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -12,6 +12,7 @@ #include "qemu/main-loop.h" #include "qemu/log.h" #include "qemu/error-report.h" +#include "qemu/cutils.h" #include "linux/kvm.h" #include "exec/address-spaces.h" #include "cpu.h" @@ -37,9 +38,21 @@ #include "standard-headers/xen/event_channel.h" #include "standard-headers/xen/grant_table.h" +#include + #define PAGE_OFFSET 0xffffffff80000000UL #define PAGE_SHIFT 12 +/* Import from libxencontrol */ +#define X86_HVM_END_SPECIAL_REGION 0xff000U +#define X86_HVM_NR_SPECIAL_PAGES 8i + +#define SPECIALPAGE_XENSTORE 0 +#define SPECIALPAGE_CONSOLE 1 + +#define xen_special_pfn(x) \ + (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES + (x)) + /* * Unhandled hypercalls error: * @@ -51,6 +64,7 @@ #endif static QemuMutex xen_global_mutex; +static int xs_domid; static void *gpa_to_hva(uint64_t gpa) { @@ -143,6 +157,14 @@ int kvm_xen_set_hypercall_page(CPUState *env) static void kvm_xen_exit(Notifier *n, void *data) { + XenState *xen = container_of(n, XenState, exit); + struct xs_handle *xsh = xen->xenstore; + + if (xsh) { + xs_release_domain(xsh, xen->domid); + xs_rm(xsh, XBT_NULL, xs_get_domain_path(xsh, xen->domid)); + xs_close(xsh); + } } void kvm_xen_init(XenState *xen) @@ -1094,14 +1116,172 @@ int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) } } +static int kvm_xen_connect_xenstore(CPUState *cs) +{ + XenState *xen = cs->xen_state; + struct xs_handle *xsh; + unsigned int len = 0; + char *str; + long int res; + + if (!X86_CPU(cs)->xen_xenbus) { + return -EINVAL; + } + + xsh = xs_open(XS_OPEN_SOCKETONLY); + if (!xsh) { + error_report("Cannot connect to xenstore"); + return -ENOENT; + } + + str = xs_read(xsh, XBT_NULL, "/tool/xenstored/domid", &len); + if (str) { + if (qemu_strtol(str, NULL, 10, &res) < 0) { + error_report("Failed to parse xenstored domid"); + return -EINVAL; + } + free(str); + if (res < 0) { + error_report("Invalid xenstored domid"); + return -EINVAL; + } + xs_domid = res; + } + + xen->xenstore = xsh; + return 0; +} + +static void kvm_xen_seed_xenbus(CPUState *cs) +{ + struct xs_permissions frontend_perms[2]; + XenState *xen = cs->xen_state; + struct xs_handle *xsh = xen->xenstore; + char *parent = xs_get_domain_path(xsh, xen->domid); + xs_transaction_t t; + char *path = NULL; + char *value = NULL; + int len; + +retry_transaction: + t = xs_transaction_start(xsh); + + xs_mkdir(xsh, t, parent); + + frontend_perms[0].id = xen->domid; + frontend_perms[0].perms = XS_PERM_NONE; + frontend_perms[1].id = xs_domid; + frontend_perms[1].perms = XS_PERM_READ; + xs_set_permissions(xsh, t, parent, frontend_perms, 2); + + if (asprintf(&path, "%s/domid", parent) > 0 && + (len = asprintf(&value, "%d", xen->domid)) > 0) { + xs_write(xsh, t, path, value, len); + free(value); + value = NULL; + free(path); + path = NULL; + } + + if (asprintf(&path, "%s/store/port", parent) > 0 && + (len = asprintf(&value, "%d", xen->xenstore_port)) > 0) { + xs_write(xsh, t, path, value, len); + free(value); + value = NULL; + free(path); + path = NULL; + } + if (asprintf(&path, "%s/store/ring-ref", parent) && + (len = asprintf(&value, "%d", xen->xenstore_pfn)) > 0) { + xs_write(xsh, t, path, value, len); + free(value); + value = NULL; + } + + if (!xs_transaction_end(xsh, t, 0)) { + if (errno == EAGAIN) { + goto retry_transaction; + } + } +} + +static void kvm_xen_set_xenbus(CPUState *cs) +{ + unsigned long pfn; + XenState *xen = cs->xen_state; + XenGrantTable *gnttab = &xen->gnttab; + struct evtchn_alloc_unbound alloc = { + .dom = DOMID_SELF, .remote_dom = xs_domid, + }; + + if (kvm_xen_evtchn_alloc_unbound(X86_CPU(cs), &alloc)) { + error_report("failed to set xenstored port"); + return; + } + + pfn = xen_special_pfn(SPECIALPAGE_XENSTORE); + memory_region_init_ram_shared(&xen->mr, NULL, "xenbus", + TARGET_PAGE_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), pfn << PAGE_SHIFT, + &xen->mr); + memset(memory_region_get_ram_ptr(&xen->mr), 0, TARGET_PAGE_SIZE); + + xen->xenstore_pfn = pfn; + xen->xenstore_port = alloc.port; + + gnttab->frames_v1[0][GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + gnttab->frames_v1[0][GNTTAB_RESERVED_XENSTORE].domid = xs_domid; + gnttab->frames_v1[0][GNTTAB_RESERVED_XENSTORE].frame = xen->xenstore_pfn; +} + +static int kvm_xen_introduce_domain(CPUState *cs) +{ + XenState *xen = cs->xen_state; + + if (!xen->xenstore) { + return -EFAULT; + } + + if (xs_introduce_domain(xen->xenstore, xen->domid, + xen->xenstore_pfn, xen->xenstore_port)) { + return 0; + } + + return -EFAULT; +} + int kvm_xen_vcpu_init(CPUState *cs) { + if (cs->cpu_index != 0) { + return 0; + } + if (!kvm_check_extension(cs->kvm_state, KVM_CAP_XEN_HVM) || - !kvm_check_extension(cs->kvm_state, KVM_CAP_XEN_HVM_GUEST)) + !kvm_check_extension(cs->kvm_state, KVM_CAP_XEN_HVM_GUEST)) { return -ENOTSUP; + } kvm_xen_set_hypercall_page(cs); - kvm_xen_set_gnttab(cs); + + if (kvm_xen_set_gnttab(cs) < 0) { + error_report("vcpu%d: failed to allocate grant table", cs->cpu_index); + return -EFAULT; + } + + if (!kvm_xen_connect_xenstore(cs)) { + kvm_xen_set_xenbus(cs); + kvm_xen_seed_xenbus(cs); + } else if (!X86_CPU(cs)->xen_xenbus) { + return 0; + } else { + return -ENOENT; + } + + if (kvm_xen_introduce_domain(cs) < 0) { + error_report("vcpu%d: failed to introduce domain", cs->cpu_index); + return -EFAULT; + } + return 0; }