]> www.infradead.org Git - users/dwmw2/vmclock.git/commitdiff
Update to non-RFC v3 posting
authorDavid Woodhouse <dwmw@amazon.co.uk>
Mon, 29 Jul 2024 14:02:45 +0000 (14:02 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Mon, 29 Jul 2024 14:04:19 +0000 (14:04 +0000)
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
ptp_vmclock.c
vmclock-abi.h

index adf6ef31d1a426b669feade0b34dbf063e05f6bb..58a6ba2c7b05ab15f83cfa2c9bd7e6f4cc22fa5d 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/version.h>
 
 #include "vmclock-abi.h"
 
@@ -27,7 +28,7 @@
 #include <asm/kvmclock.h>
 #endif
 
-#ifdef CONFIG_KVM_GUESTx
+#if defined(CONFIG_KVM_GUEST) && LINUX_VERSION_CODE >= KERNEL_VERSION(6,9,0)
 #define SUPPORT_KVMCLOCK
 #endif
 
@@ -48,6 +49,13 @@ struct vmclock_state {
 
 #define VMCLOCK_MAX_WAIT ms_to_ktime(100)
 
+/* Require at least the flags field to be present. All else can be optional. */
+#define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad)
+
+#define VMCLOCK_FIELD_PRESENT(_c, _f)                    \
+       (_c)->size >= (offsetof(struct vmclock_abi, _f) + \
+                      sizeof((_c)->_f))
+
 /*
  * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64
  * and add the fractional second part of the reference time.
@@ -58,9 +66,9 @@ struct vmclock_state {
  * If __int128 isn't available, perform the calculation 32 bits at a time to
  * avoid overflow.
  */
-static inline uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
-                                              uint64_t period, uint8_t shift,
-                                              uint64_t frac_sec)
+static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
+                                       uint64_t period, uint8_t shift,
+                                       uint64_t frac_sec)
 {
        unsigned __int128 res = (unsigned __int128)delta * period;
 
@@ -70,7 +78,7 @@ static inline uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
        return (uint64_t)res;
 }
 
-static inline bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
+static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
 {
        if (likely(clk->time_type == VMCLOCK_TIME_UTC))
                return true;
@@ -104,6 +112,11 @@ static int vmclock_get_crosststamp(struct vmclock_state *st,
 
        while (1) {
                seq = st->clk->seq_count & ~1ULL;
+
+               /*
+                * This pairs with a write barrier in the hypervisor
+                * which populates this structure.
+                */
                virt_rmb();
 
                if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
@@ -131,8 +144,9 @@ static int vmclock_get_crosststamp(struct vmclock_state *st,
                                cycle = get_cycles();
                                ptp_read_system_postts(sts);
                        }
-               } else
+               } else {
                        cycle = get_cycles();
+               }
 
                delta = cycle - st->clk->counter_value;
 
@@ -156,7 +170,9 @@ static int vmclock_get_crosststamp(struct vmclock_state *st,
 
        if (system_counter) {
                system_counter->cycles = cycle;
-               //system_counter->cs_id = st->cs_id;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,9,0)
+               system_counter->cs_id = st->cs_id;
+#endif
        }
 
        if (sts) {
@@ -235,7 +251,6 @@ static int ptp_vmclock_get_time_fn(ktime_t *device_time,
        return ret;
 }
 
-
 static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
                                      struct system_device_crosststamp *xtstamp)
 {
@@ -313,6 +328,37 @@ static const struct ptp_clock_info ptp_vmclock_info = {
        .getcrosststamp = ptp_vmclock_getcrosststamp,
 };
 
+static struct ptp_clock *vmclock_ptp_register(struct device *dev,
+                                             struct vmclock_state *st)
+{
+       enum clocksource_ids cs_id;
+
+       if (IS_ENABLED(CONFIG_ARM64) &&
+           st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
+               /* Can we check it's the virtual counter? */
+               cs_id = CSID_ARM_ARCH_COUNTER;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,9,0)
+       } else if (IS_ENABLED(CONFIG_X86) &&
+                  st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
+               cs_id = CSID_X86_TSC;
+#endif
+       } else {
+               return NULL;
+       }
+
+       /* Only UTC, or TAI with offset */
+       if (!tai_adjust(st->clk, NULL)) {
+               dev_info(dev, "vmclock does not provide unambiguous UTC\n");
+               return NULL;
+       }
+
+       st->sys_cs_id = st->cs_id = cs_id;
+       st->ptp_clock_info = ptp_vmclock_info;
+       strscpy(st->ptp_clock_info.name, st->name, sizeof(st->ptp_clock_info.name));
+
+       return ptp_clock_register(&st->ptp_clock_info, dev);
+}
+
 static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
 {
        struct vmclock_state *st = container_of(fp->private_data,
@@ -420,12 +466,20 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st)
        struct acpi_device *adev = ACPI_COMPANION(dev);
        acpi_status status;
 
-        status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
-                                     vmclock_acpi_resources, st);
-        if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
-                dev_err(dev, "failed to get resources\n");
-                return -ENODEV;
-        }
+       /*
+        * This should never happen as this function is only called when
+        * has_acpi_companion(dev) is true, but the logic is sufficiently
+        * complex that Coverity can't see the tautology.
+        */
+       if (!adev)
+               return -ENODEV;
+
+       status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
+                                    vmclock_acpi_resources, st);
+       if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
+               dev_err(dev, "failed to get resources\n");
+               return -ENODEV;
+       }
 
        return 0;
 }
@@ -457,6 +511,12 @@ static int vmclock_probe(struct platform_device *pdev)
                goto out;
        }
 
+       if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) {
+               dev_info(dev, "Region too small (0x%llx)\n",
+                        resource_size(&st->res));
+               ret = -EINVAL;
+               goto out;
+       }
        st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res),
                                MEMREMAP_WB | MEMREMAP_DEC);
        if (IS_ERR(st->clk)) {
@@ -467,7 +527,7 @@ static int vmclock_probe(struct platform_device *pdev)
        }
 
        if (st->clk->magic != VMCLOCK_MAGIC ||
-           st->clk->size < sizeof(*st->clk) ||
+           st->clk->size > resource_size(&st->res) ||
            st->clk->version != 1) {
                dev_info(dev, "vmclock magic fields invalid\n");
                ret = -EINVAL;
@@ -479,7 +539,7 @@ static int vmclock_probe(struct platform_device *pdev)
                goto out;
 
        st->index = ret;
-        ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
+       ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
        if (ret)
                goto out;
 
@@ -489,7 +549,12 @@ static int vmclock_probe(struct platform_device *pdev)
                goto out;
        }
 
-       /* If the structure is big enough, it can be mapped to userspace */
+       /*
+        * If the structure is big enough, it can be mapped to userspace.
+        * Theoretically a guest OS even using larger pages could still
+        * use 4KiB PTEs to map smaller MMIO regions like this, but let's
+        * cross that bridge if/when we come to it.
+        */
        if (st->clk->size >= PAGE_SIZE) {
                st->miscdev.minor = MISC_DYNAMIC_MINOR;
                st->miscdev.fops = &vmclock_miscdev_fops;
@@ -501,37 +566,18 @@ static int vmclock_probe(struct platform_device *pdev)
        }
 
        /* If there is valid clock information, register a PTP clock */
-       if (IS_ENABLED(CONFIG_ARM64) &&
-           st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
-               /* Can we check it's the virtual counter? */
-               st->cs_id = CSID_ARM_ARCH_COUNTER;
-#if 0
-       } else if (IS_ENABLED(CONFIG_X86) &&
-                  st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
-               st->cs_id = CSID_X86_TSC;
-#endif
-       }
-
-       /* Only UTC, or TAI with offset */
-       if (!tai_adjust(st->clk, NULL)) {
-               dev_info(dev, "vmclock does not provide unambiguous UTC\n");
-               st->cs_id = CSID_GENERIC;
-       }
-
-       if (st->cs_id) {
-               st->sys_cs_id = st->cs_id;
-
-               st->ptp_clock_info = ptp_vmclock_info;
-               strscpy(st->ptp_clock_info.name, st->name, sizeof(st->ptp_clock_info.name));
-               st->ptp_clock = ptp_clock_register(&st->ptp_clock_info, dev);
-
+       if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) {
+               /* Can return a silent NULL, or an error. */
+               st->ptp_clock = vmclock_ptp_register(dev, st);
                if (IS_ERR(st->ptp_clock)) {
                        ret = PTR_ERR(st->ptp_clock);
                        st->ptp_clock = NULL;
                        vmclock_remove(pdev);
                        goto out;
                }
-       } else if (!st->miscdev.minor) {
+       }
+
+       if (!st->miscdev.minor && !st->ptp_clock) {
                /* Neither miscdev nor PTP registered */
                dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n");
                ret = -ENODEV;
@@ -550,6 +596,7 @@ static int vmclock_probe(struct platform_device *pdev)
 }
 
 static const struct acpi_device_id vmclock_acpi_ids[] = {
+       { "AMZNC10C", 0 },
        { "VMCLOCK", 0 },
        {}
 };
index 3bde10ddec38d9159ffee2a851896ed2f0917f39..19cbf85efde68fa86331d3b4279c1abfd2773b68 100644 (file)
@@ -144,10 +144,9 @@ struct vmclock_abi {
        int16_t tai_offset_sec;
        uint8_t leap_indicator;
        /*
-        * This field is based on the the VIRTIO_RTC_LEAP_xxx values as
-        * defined in the current draft of virtio-rtc, but since smearing
-        * cannot be used with the shared memory device, some values are
-        * not used.
+        * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined
+        * in the current draft of virtio-rtc, but since smearing cannot be
+        * used with the shared memory device, some values are not used.
         *
         * The _POST_POS and _POST_NEG values allow the guest to perform
         * its own smearing during the day or so after a leap second when
@@ -168,8 +167,8 @@ struct vmclock_abi {
         */
        uint64_t counter_value;
        /*
-        * Counter frequency, and error margin. The unit of these fields is
-        * seconds >> (64 + counter_period_shift)
+        * Counter period, and error margin of same. The unit of these
+        * fields is 1/2^(64 + counter_period_shift) of a second.
         */
        uint64_t counter_period_frac_sec;
        uint64_t counter_period_esterror_rate_frac_sec;
@@ -179,7 +178,7 @@ struct vmclock_abi {
         * Time according to time_type field above.
         */
        uint64_t time_sec;              /* Seconds since time_type epoch */
-       uint64_t time_frac_sec;         /* (seconds >> 64) */
+       uint64_t time_frac_sec;         /* Units of 1/2^64 of a second */
        uint64_t time_esterror_nanosec;
        uint64_t time_maxerror_nanosec;
 };