#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
+#include <linux/version.h>
#include "vmclock-abi.h"
#include <asm/kvmclock.h>
#endif
-#ifdef CONFIG_KVM_GUESTx
+#if defined(CONFIG_KVM_GUEST) && LINUX_VERSION_CODE >= KERNEL_VERSION(6,9,0)
#define SUPPORT_KVMCLOCK
#endif
#define VMCLOCK_MAX_WAIT ms_to_ktime(100)
+/* Require at least the flags field to be present. All else can be optional. */
+#define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad)
+
+#define VMCLOCK_FIELD_PRESENT(_c, _f) \
+ (_c)->size >= (offsetof(struct vmclock_abi, _f) + \
+ sizeof((_c)->_f))
+
/*
* Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64
* and add the fractional second part of the reference time.
* If __int128 isn't available, perform the calculation 32 bits at a time to
* avoid overflow.
*/
-static inline uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
- uint64_t period, uint8_t shift,
- uint64_t frac_sec)
+static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
+ uint64_t period, uint8_t shift,
+ uint64_t frac_sec)
{
unsigned __int128 res = (unsigned __int128)delta * period;
return (uint64_t)res;
}
-static inline bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
+static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
{
if (likely(clk->time_type == VMCLOCK_TIME_UTC))
return true;
while (1) {
seq = st->clk->seq_count & ~1ULL;
+
+ /*
+ * This pairs with a write barrier in the hypervisor
+ * which populates this structure.
+ */
virt_rmb();
if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
cycle = get_cycles();
ptp_read_system_postts(sts);
}
- } else
+ } else {
cycle = get_cycles();
+ }
delta = cycle - st->clk->counter_value;
if (system_counter) {
system_counter->cycles = cycle;
- //system_counter->cs_id = st->cs_id;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,9,0)
+ system_counter->cs_id = st->cs_id;
+#endif
}
if (sts) {
return ret;
}
-
static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
struct system_device_crosststamp *xtstamp)
{
.getcrosststamp = ptp_vmclock_getcrosststamp,
};
+static struct ptp_clock *vmclock_ptp_register(struct device *dev,
+ struct vmclock_state *st)
+{
+ enum clocksource_ids cs_id;
+
+ if (IS_ENABLED(CONFIG_ARM64) &&
+ st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
+ /* Can we check it's the virtual counter? */
+ cs_id = CSID_ARM_ARCH_COUNTER;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,9,0)
+ } else if (IS_ENABLED(CONFIG_X86) &&
+ st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
+ cs_id = CSID_X86_TSC;
+#endif
+ } else {
+ return NULL;
+ }
+
+ /* Only UTC, or TAI with offset */
+ if (!tai_adjust(st->clk, NULL)) {
+ dev_info(dev, "vmclock does not provide unambiguous UTC\n");
+ return NULL;
+ }
+
+ st->sys_cs_id = st->cs_id = cs_id;
+ st->ptp_clock_info = ptp_vmclock_info;
+ strscpy(st->ptp_clock_info.name, st->name, sizeof(st->ptp_clock_info.name));
+
+ return ptp_clock_register(&st->ptp_clock_info, dev);
+}
+
static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct vmclock_state *st = container_of(fp->private_data,
struct acpi_device *adev = ACPI_COMPANION(dev);
acpi_status status;
- status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
- vmclock_acpi_resources, st);
- if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
- dev_err(dev, "failed to get resources\n");
- return -ENODEV;
- }
+ /*
+ * This should never happen as this function is only called when
+ * has_acpi_companion(dev) is true, but the logic is sufficiently
+ * complex that Coverity can't see the tautology.
+ */
+ if (!adev)
+ return -ENODEV;
+
+ status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
+ vmclock_acpi_resources, st);
+ if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
+ dev_err(dev, "failed to get resources\n");
+ return -ENODEV;
+ }
return 0;
}
goto out;
}
+ if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) {
+ dev_info(dev, "Region too small (0x%llx)\n",
+ resource_size(&st->res));
+ ret = -EINVAL;
+ goto out;
+ }
st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res),
MEMREMAP_WB | MEMREMAP_DEC);
if (IS_ERR(st->clk)) {
}
if (st->clk->magic != VMCLOCK_MAGIC ||
- st->clk->size < sizeof(*st->clk) ||
+ st->clk->size > resource_size(&st->res) ||
st->clk->version != 1) {
dev_info(dev, "vmclock magic fields invalid\n");
ret = -EINVAL;
goto out;
st->index = ret;
- ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
+ ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
if (ret)
goto out;
goto out;
}
- /* If the structure is big enough, it can be mapped to userspace */
+ /*
+ * If the structure is big enough, it can be mapped to userspace.
+ * Theoretically a guest OS even using larger pages could still
+ * use 4KiB PTEs to map smaller MMIO regions like this, but let's
+ * cross that bridge if/when we come to it.
+ */
if (st->clk->size >= PAGE_SIZE) {
st->miscdev.minor = MISC_DYNAMIC_MINOR;
st->miscdev.fops = &vmclock_miscdev_fops;
}
/* If there is valid clock information, register a PTP clock */
- if (IS_ENABLED(CONFIG_ARM64) &&
- st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
- /* Can we check it's the virtual counter? */
- st->cs_id = CSID_ARM_ARCH_COUNTER;
-#if 0
- } else if (IS_ENABLED(CONFIG_X86) &&
- st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
- st->cs_id = CSID_X86_TSC;
-#endif
- }
-
- /* Only UTC, or TAI with offset */
- if (!tai_adjust(st->clk, NULL)) {
- dev_info(dev, "vmclock does not provide unambiguous UTC\n");
- st->cs_id = CSID_GENERIC;
- }
-
- if (st->cs_id) {
- st->sys_cs_id = st->cs_id;
-
- st->ptp_clock_info = ptp_vmclock_info;
- strscpy(st->ptp_clock_info.name, st->name, sizeof(st->ptp_clock_info.name));
- st->ptp_clock = ptp_clock_register(&st->ptp_clock_info, dev);
-
+ if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) {
+ /* Can return a silent NULL, or an error. */
+ st->ptp_clock = vmclock_ptp_register(dev, st);
if (IS_ERR(st->ptp_clock)) {
ret = PTR_ERR(st->ptp_clock);
st->ptp_clock = NULL;
vmclock_remove(pdev);
goto out;
}
- } else if (!st->miscdev.minor) {
+ }
+
+ if (!st->miscdev.minor && !st->ptp_clock) {
/* Neither miscdev nor PTP registered */
dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n");
ret = -ENODEV;
}
static const struct acpi_device_id vmclock_acpi_ids[] = {
+ { "AMZNC10C", 0 },
{ "VMCLOCK", 0 },
{}
};