From 1e383b4bfea84c4796dd98b20edd52e7575b13d6 Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Tue, 25 Apr 2017 12:04:00 -0600 Subject: [PATCH] sparc64: Detect DAX ra+pgsz when hvapi minor doesn't indicate it Orabug: 25911008 The RA+PGSZ HV API feature is detected via a controlled experiment. The experiment constructs a small DAX request and places the output buffer at the very end of an 8k page. Then it checks to see if the 8k page boundary was honored, and if so, then we've got the ability to pass the page size along with a real address. Once the HV API minor number is bumped to 1, this will be the primary method of detection. Signed-off-by: Rob Gardner Reviewed-by: Jonathan Helman (cherry picked from commit 013d5b9909e804817dcd939f50f242f09237feac) Signed-off-by: Allen Pais --- arch/sparc/dax/dax_impl.h | 9 ++-- arch/sparc/dax/dax_main.c | 2 +- arch/sparc/dax/dax_misc.c | 107 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 5 deletions(-) diff --git a/arch/sparc/dax/dax_impl.h b/arch/sparc/dax/dax_impl.h index fb310fd6d1c4..2f5931da5409 100644 --- a/arch/sparc/dax/dax_impl.h +++ b/arch/sparc/dax/dax_impl.h @@ -155,12 +155,12 @@ extern const struct vm_operations_struct dax_vm_ops; #define DAX_PERF_CTR_OFFSET(num, node, dax) \ DAX_PERF_REG_OFF(num, DAX_PERF_CTR_0, (node), (dax)) -/* dax flow control test constants */ +/* dax flow control and ra/pgsz test constants */ #define DAX_FLOW_LIMIT 64UL -#define DAX_INPUT_ELEMS 64 +#define DAX_INPUT_ELEMS 128 #define DAX_INPUT_ELEM_SZ 1 -#define DAX_OUTPUT_ELEMS 64 -#define DAX_OUTPUT_ELEM_SZ 2 +#define DAX_OUTPUT_ELEMS 128 +#define DAX_OUTPUT_ELEM_SZ 1 enum dax_types { DAX1, @@ -263,6 +263,7 @@ void dax_unlock_pages_ccb(struct dax_ctx *ctx, int ccb_num, union ccb *ccbp, bool warn); void dax_prt_ccbs(union ccb *ccb, u64 len); bool dax_has_flow_ctl_numa(void); +bool dax_has_ra_pgsz(void); long dax_perfcount_ioctl(struct file *f, unsigned int cmd, unsigned long arg); union ccb *dax_ccb_buffer_reserve(struct dax_ctx *ctx, size_t len, size_t *reserved); diff --git a/arch/sparc/dax/dax_main.c b/arch/sparc/dax/dax_main.c index 3fe8ca88190d..d6dc550165a9 100644 --- a/arch/sparc/dax/dax_main.c +++ b/arch/sparc/dax/dax_main.c @@ -141,7 +141,7 @@ static int __init dax_attach(void) DAX_MAJOR, minor); } - dax_no_ra_pgsz = (DAX_MAJOR == 1) && (minor == 0); + dax_no_ra_pgsz = (DAX_MAJOR == 1) && (minor == 0) && !dax_has_ra_pgsz(); dax_dbg("RA pagesize feature %spresent", dax_no_ra_pgsz ? "not " : ""); ret = hv_get_hwqueue_size(&max_ccbs); diff --git a/arch/sparc/dax/dax_misc.c b/arch/sparc/dax/dax_misc.c index 443b30183976..1928df9b423c 100644 --- a/arch/sparc/dax/dax_misc.c +++ b/arch/sparc/dax/dax_misc.c @@ -159,6 +159,113 @@ bool dax_has_flow_ctl_numa(void) return !!atomic_read(&has_flow_ctl); } +bool dax_has_ra_pgsz(void) +{ + struct ccb_extract *ccb; + struct ccb_completion_area *ca; + char *mem, *dax_input, *dax_output; + unsigned long submitted_ccb_buf_len, nomap_va, hv_rv, ra, va; + long timeout; + bool ret = false; + int i; + + /* allocate 3 pages so we are guaranteed a 16k aligned chunk inside it */ + mem = kzalloc(3*PAGE_SIZE, GFP_KERNEL); + + if (mem == NULL) + return false; + + va = ALIGN((unsigned long)mem, 2*PAGE_SIZE); + ccb = (struct ccb_extract *) va; + ca = (struct ccb_completion_area *)ALIGN(va + sizeof(*ccb), + sizeof(*ca)); + dax_input = (char *)ca + sizeof(*ca); + /* position output address 16 bytes before the end of the page */ + dax_output = (char *) ALIGN((u64)dax_input, PAGE_SIZE) - 16; + + ccb->control.hdr.opcode = CCB_QUERY_OPCODE_EXTRACT; + + /* I/O formats and sizes */ + ccb->control.src0_fmt = CCB_QUERY_IFMT_FIX_BYTE; + ccb->control.src0_sz = DAX_INPUT_ELEM_SZ - 1; /* 1 byte */ + ccb->control.output_sz = DAX_OUTPUT_ELEM_SZ - 1; /* 1 byte */ + ccb->control.output_fmt = CCB_QUERY_OFMT_BYTE_ALIGN; + + /* addresses */ + *(u64 *)&ccb->src0 = (u64) dax_input; + *(u64 *)&ccb->output = (u64) virt_to_phys(dax_output); + *(u64 *)&ccb->completion = (u64) ca; + + /* address types */ + ccb->control.hdr.at_src0 = CCB_AT_VA; + ccb->control.hdr.at_dst = CCB_AT_RA; + ccb->control.hdr.at_cmpl = CCB_AT_VA; + + /* input sizes */ + ccb->data_acc_ctl.input_len_fmt = CCB_QUERY_ILF_BYTE; + ccb->data_acc_ctl.input_cnt = (DAX_INPUT_ELEMS * DAX_INPUT_ELEM_SZ) - 1; + + /* no flow control, we are testing for page limit */ + ccb->data_acc_ctl.flow_ctl = 0; + + memset(dax_input, 0x99, DAX_INPUT_ELEMS * DAX_INPUT_ELEM_SZ); + memset(dax_output, 0x77, DAX_OUTPUT_ELEMS * DAX_OUTPUT_ELEM_SZ); + + ra = virt_to_phys(ccb); + + hv_rv = sun4v_dax_ccb_submit((void *) ra, 64, HV_DAX_CCB_VA_PRIVILEGED | HV_DAX_QUERY_CMD, 0, + &submitted_ccb_buf_len, &nomap_va); + if (hv_rv != HV_EOK) { + dax_info("failed dax submit, ret=0x%lx", hv_rv); + if (dax_debug & DAX_DBG_FLG_BASIC) + dax_prt_ccbs((union ccb *)ccb, 64); + goto done; + } + + timeout = 10LL * 1000LL * 1000LL; /* 10ms in ns */ + while (timeout > 0) { + unsigned long status; + unsigned long mwait_time = 8192; + + /* monitored load */ + __asm__ __volatile__("lduba [%1] 0x84, %0\n\t" + : "=r" (status) : "r" (&ca->cmd_status)); + if (status == CCB_CMD_STAT_NOT_COMPLETED) + __asm__ __volatile__("wr %0, %%asr28\n\t" /* mwait */ + : : "r" (mwait_time)); + else + break; + timeout = timeout - mwait_time; + } + if (timeout <= 0) { + dax_alert("dax ra_pgsz test timed out"); + goto done; + } + + if (ca->cmd_status == CCB_CMD_STAT_FAILED && + ca->err_mask == CCB_CMD_ERR_POF) { + ret = true; + dax_dbg("dax ra_pgsz test succeeded: feature is available"); + } + else { + dax_dbg("dax ra_pgsz test failed: feature not available"); + } + + dax_dbg("page overflow test, output_sz=%d", ca->output_sz); + dax_dbg("mem=%p, va=0x%lx, ccb=%p, ca=%p, out=%p", + mem, va, ccb, ca, dax_output); + dax_dbg("cmd_status=%d, err_mask=0x%x", + ca->cmd_status, ca->err_mask); + dax_prt_ccbs((union ccb *)ccb, 64); + for (i=0; i<64; i=i+8) { + dax_dbg("%08lx/ %08lx", (unsigned long) dax_output+i, *(unsigned long *)(dax_output+i)); + } + +done: + kfree(mem); + return ret; +} + void dax_overflow_check(struct dax_ctx *ctx, int idx) { unsigned long virtp, page_size = PAGE_SIZE; -- 2.50.1