]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
nd_blk: change aperture mapping from WC to WB
authorRoss Zwisler <ross.zwisler@linux.intel.com>
Thu, 27 Aug 2015 19:14:20 +0000 (13:14 -0600)
committerDan Duval <dan.duval@oracle.com>
Wed, 7 Dec 2016 17:19:27 +0000 (12:19 -0500)
Orabug: 22913653

This should result in a pretty sizeable performance gain for reads.  For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB).  This was
done on a random lab machine.

PMEM reads from a write combining mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
100000+0 records in
100000+0 records out
409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s

PMEM reads from a write-back mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
1000000+0 records in
1000000+0 records out
4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s

To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read.  This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM.  We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.

In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range().  This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Duval <dan.duval@oracle.com>
(cherry picked from commit 67a3e8fe90156d41cd480d3dfbb40f3bc007c262)

Conflicts:

arch/x86/Kconfig
drivers/acpi/nfit.c

arch/x86/Kconfig
arch/x86/include/asm/cacheflush.h
arch/x86/include/asm/io.h
arch/x86/include/asm/pmem.h
drivers/acpi/Kconfig
drivers/acpi/nfit.c
drivers/acpi/nfit.h
lib/Kconfig
tools/testing/nvdimm/Kbuild
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c

index ab6f02fbb6bb6f5aba33cd39ccc6729092cb93fe..e139e1ad82961e729559165296c0a7e5142c96d4 100644 (file)
@@ -28,6 +28,10 @@ config X86
        select ARCH_HAS_FAST_MULTIPLIER
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_PMEM_API
+       select ARCH_HAS_MMIO_FLUSH
+       select ARCH_HAS_SG_CHAIN
+       select ARCH_HAVE_NMI_SAFE_CMPXCHG
+       select ARCH_MIGHT_HAVE_ACPI_PDC         if ACPI
        select ARCH_MIGHT_HAVE_PC_PARPORT
        select ARCH_MIGHT_HAVE_PC_SERIO
        select HAVE_AOUT if X86_32
index de844c6a7525cf80c794a80106e38faba7fad947..2c4f3050e799a2f5e14bcb0063954db6d8772ae3 100644 (file)
@@ -85,6 +85,8 @@ int set_pages_rw(struct page *page, int numpages);
 
 void clflush_cache_range(void *addr, unsigned int size);
 
+#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
+
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 extern const int rodata_test_data;
index 5f769c9fa52eb50b5d67e91e6a4dd350aa3efe54..81942efae8a2fc888e50294d3f6c97d8b9a23e93 100644 (file)
@@ -248,8 +248,6 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
index a3a0df6545eef18a05f717d74851c1bc84a516ea..bb026c5adf8a88156015e4ac3eec4bb741f1a765 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
  * arch_memcpy_to_pmem - copy data to persistent memory
index 9c43ae301300e3ec81b16f287d294f40144dcc0c..14089c1e40496f86461a146b3fe709e533d15d19 100644 (file)
@@ -387,6 +387,7 @@ config ACPI_NFIT
        tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
        depends on PHYS_ADDR_T_64BIT
        depends on BLK_DEV
+       depends on ARCH_HAS_MMIO_FLUSH
        select LIBNVDIMM
        help
          Infrastructure to probe ACPI 6 compliant platforms for
index eb14e163c8fa1434191bea155105ebd31fe71013..f499627b8a3c20088d55b4bb34837e00f274cbee 100644 (file)
@@ -1009,7 +1009,7 @@ static void wmb_blk(struct nfit_blk *nfit_blk)
                wmb_pmem();
 }
 
-static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
 {
        struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
        u64 offset = nfit_blk->stat_offset + mmio->size * bw;
@@ -1017,7 +1017,7 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
        if (mmio->num_lines)
                offset = to_interleave_offset(offset, mmio);
 
-       return readl(mmio->base + offset);
+       return readq(mmio->addr.base + offset);
 }
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@@ -1042,11 +1042,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
        if (mmio->num_lines)
                offset = to_interleave_offset(offset, mmio);
 
-       writeq(cmd, mmio->base + offset);
+       writeq(cmd, mmio->addr.base + offset);
        wmb_blk(nfit_blk);
 
        if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
-               readq(mmio->base + offset);
+               readq(mmio->addr.base + offset);
 }
 
 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1078,11 +1078,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
                }
 
                if (rw)
-                       memcpy_to_pmem(mmio->aperture + offset,
+                       memcpy_to_pmem(mmio->addr.aperture + offset,
                                        iobuf + copied, c);
-               else
+               else {
+                       if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
+                               mmio_flush_range((void __force *)
+                                       mmio->addr.aperture + offset, c);
+
                        memcpy_from_pmem(iobuf + copied,
-                                       mmio->aperture + offset, c);
+                                       mmio->addr.aperture + offset, c);
+               }
 
                copied += c;
                len -= c;
@@ -1129,7 +1134,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
 
        WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
        dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
-       iounmap(spa_map->iomem);
+       if (spa_map->type == SPA_MAP_APERTURE)
+               memunmap((void __force *)spa_map->addr.aperture);
+       else
+               iounmap(spa_map->addr.base);
        release_mem_region(spa->address, spa->length);
        list_del(&spa_map->list);
        kfree(spa_map);
@@ -1175,7 +1183,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
        spa_map = find_spa_mapping(acpi_desc, spa);
        if (spa_map) {
                kref_get(&spa_map->kref);
-               return spa_map->iomem;
+               return spa_map->addr.base;
        }
 
        spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@@ -1191,20 +1199,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
        if (!res)
                goto err_mem;
 
-       if (type == SPA_MAP_APERTURE) {
-               /*
-                * TODO: memremap_pmem() support, but that requires cache
-                * flushing when the aperture is moved.
-                */
-               spa_map->iomem = ioremap_wc(start, n);
-       } else
-               spa_map->iomem = ioremap_nocache(start, n);
+       spa_map->type = type;
+       if (type == SPA_MAP_APERTURE)
+               spa_map->addr.aperture = (void __pmem *)memremap(start, n,
+                                                       ARCH_MEMREMAP_PMEM);
+       else
+               spa_map->addr.base = ioremap_nocache(start, n);
+
 
-       if (!spa_map->iomem)
+       if (!spa_map->addr.base)
                goto err_map;
 
        list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
-       return spa_map->iomem;
+       return spa_map->addr.base;
 
  err_map:
        release_mem_region(start, n);
@@ -1267,7 +1274,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
                nfit_blk->dimm_flags = flags.flags;
        else if (rc == -ENOTTY) {
                /* fall back to a conservative default */
-               nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
+               nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
                rc = 0;
        } else
                rc = -ENXIO;
@@ -1307,9 +1314,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        /* map block aperture memory */
        nfit_blk->bdw_offset = nfit_mem->bdw->offset;
        mmio = &nfit_blk->mmio[BDW];
-       mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
+       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
                        SPA_MAP_APERTURE);
-       if (!mmio->base) {
+       if (!mmio->addr.base) {
                dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
                                nvdimm_name(nvdimm));
                return -ENOMEM;
@@ -1330,9 +1337,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
        nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
        mmio = &nfit_blk->mmio[DCR];
-       mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
+       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
                        SPA_MAP_CONTROL);
-       if (!mmio->base) {
+       if (!mmio->addr.base) {
                dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
                                nvdimm_name(nvdimm));
                return -ENOMEM;
@@ -1399,7 +1406,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
        for (i = 0; i < 2; i++) {
                struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
 
-               if (mmio->base)
+               if (mmio->addr.base)
                        nfit_spa_unmap(acpi_desc, mmio->spa);
        }
        nd_blk_region_set_provider_data(ndbr, NULL);
index f2c2bb751882c3b88c39d546441071cbb495eb6f..7e740156b9c2996986e0283e2bdb5d84512929ee 100644 (file)
@@ -41,6 +41,7 @@ enum nfit_uuids {
 };
 
 enum {
+       ND_BLK_READ_FLUSH = 1,
        ND_BLK_DCR_LATCH = 2,
 };
 
@@ -117,12 +118,16 @@ enum nd_blk_mmio_selector {
        DCR,
 };
 
+struct nd_blk_addr {
+       union {
+               void __iomem *base;
+               void __pmem  *aperture;
+       };
+};
+
 struct nfit_blk {
        struct nfit_blk_mmio {
-               union {
-                       void __iomem *base;
-                       void __pmem  *aperture;
-               };
+               struct nd_blk_addr addr;
                u64 size;
                u64 base_offset;
                u32 line_size;
@@ -149,7 +154,8 @@ struct nfit_spa_mapping {
        struct acpi_nfit_system_address *spa;
        struct list_head list;
        struct kref kref;
-       void __iomem *iomem;
+       enum spa_map_type type;
+       struct nd_blk_addr addr;
 };
 
 static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
index d212a66b322929f8331a675c8b2a9846c8592f53..62c5b9de61556f14d927af398d2d7b32d47ca8b9 100644 (file)
@@ -529,4 +529,7 @@ config ARCH_HAS_SG_CHAIN
 config ARCH_HAS_PMEM_API
        bool
 
+config ARCH_HAS_MMIO_FLUSH
+       bool
+
 endmenu
index e667579d38a048e757796e729ddbbae03f91d155..98f2881ba6a28813bc4374e5c4a8cec63bd69b14 100644 (file)
@@ -1,8 +1,10 @@
 ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
 ldflags-y += --wrap=devm_ioremap_nocache
 ldflags-y += --wrap=devm_memremap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
 ldflags-y += --wrap=__devm_request_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
index ff1e0045886409c998c5afb6a6a91d550bd04997..179d2289f3a8289874edccfe6ffcbe8cffd058e4 100644 (file)
@@ -89,12 +89,25 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
        nfit_res = get_nfit_res(offset);
        rcu_read_unlock();
        if (nfit_res)
-               return (void __iomem *) nfit_res->buf + offset
-                       - nfit_res->res->start;
+               return nfit_res->buf + offset - nfit_res->res->start;
        return devm_memremap(dev, offset, size, flags);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
+void *__wrap_memremap(resource_size_t offset, size_t size,
+               unsigned long flags)
+{
+       struct nfit_test_resource *nfit_res;
+
+       rcu_read_lock();
+       nfit_res = get_nfit_res(offset);
+       rcu_read_unlock();
+       if (nfit_res)
+               return nfit_res->buf + offset - nfit_res->res->start;
+       return memremap(offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_memremap);
+
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
        return __nfit_test_ioremap(offset, size, ioremap_nocache);
@@ -120,6 +133,19 @@ void __wrap_iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(__wrap_iounmap);
 
+void __wrap_memunmap(void *addr)
+{
+       struct nfit_test_resource *nfit_res;
+
+       rcu_read_lock();
+       nfit_res = get_nfit_res((unsigned long) addr);
+       rcu_read_unlock();
+       if (nfit_res)
+               return;
+       return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
 static struct resource *nfit_test_request_region(struct device *dev,
                struct resource *parent, resource_size_t start,
                resource_size_t n, const char *name, int flags)
index 28dba918524e5bc5d02e8f0b2f87fe514b69beaa..021e6f97f33e7af2a7e570ba46cd29b72523b130 100644 (file)
@@ -1029,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
 
        lane = nd_region_acquire_lane(nd_region);
        if (rw)
-               memcpy(mmio->base + dpa, iobuf, len);
-       else
-               memcpy(iobuf, mmio->base + dpa, len);
+               memcpy(mmio->addr.base + dpa, iobuf, len);
+       else {
+               memcpy(iobuf, mmio->addr.base + dpa, len);
+
+               /* give us some some coverage of the mmio_flush_range() API */
+               mmio_flush_range(mmio->addr.base + dpa, len);
+       }
        nd_region_release_lane(nd_region, lane);
 
        return 0;