From 1ee6667cd8d183b2fed12f97285f184431d2caf9 Mon Sep 17 00:00:00 2001
From: Dan Williams
Date: Thu, 23 Jun 2016 17:50:39 -0700
Subject: libnvdimm, pfn, dax: fix initialization vs autodetect for mode +
 alignment

The updated ndctl unit tests discovered that if a pfn configuration with
a 4K alignment is read from the namespace, that alignment will be
ignored in favor of the default 2M alignment.  The result is that the
configuration will fail initialization with a message like:

    dax6.1: bad offset: 0x22000 dax disabled align: 0x200000

Fix this by allowing the alignment read from the info block to override
the default which is 2M not 0 in the autodetect path.  This also fixes a
similar problem with the mode and alignment settings silently being
overwritten by the kernel when userspace has changed it.  We now will
either overwrite the info block if userspace changes the uuid or fail
and warn if a live setting disagrees with the info block.

Cc: <stable@vger.kernel.org>
Cc: Micah Parrish <micah.parrish@hpe.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pfn_devs.c | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f7718ec685fa..cea8350fbc7e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
+	unsigned long align;
+	enum nd_pfn_mode mode;
 	struct nd_namespace_io *nsio;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
@@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -ENXIO;
 	}
 
+	align = le32_to_cpu(pfn_sb->align);
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	if (align == 0)
+		align = 1UL << ilog2(offset);
+	mode = le32_to_cpu(pfn_sb->mode);
+
 	if (!nd_pfn->uuid) {
-		/* from probe we allocate */
+		/*
+		 * When probing a namepace via nd_pfn_probe() the uuid
+		 * is NULL (see: nd_pfn_devinit()) we init settings from
+		 * pfn_sb
+		 */
 		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
 		if (!nd_pfn->uuid)
 			return -ENOMEM;
+		nd_pfn->align = align;
+		nd_pfn->mode = mode;
 	} else {
-		/* from init we validate */
+		/*
+		 * When probing a pfn / dax instance we validate the
+		 * live settings against the pfn_sb
+		 */
 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
 			return -ENODEV;
+
+		/*
+		 * If the uuid validates, but other settings mismatch
+		 * return EINVAL because userspace has managed to change
+		 * the configuration without specifying new
+		 * identification.
+		 */
+		if (nd_pfn->align != align || nd_pfn->mode != mode) {
+			dev_err(&nd_pfn->dev,
+					"init failed, settings mismatch\n");
+			dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
+					nd_pfn->align, align, nd_pfn->mode,
+					mode);
+			return -EINVAL;
+		}
 	}
 
-	if (nd_pfn->align == 0)
-		nd_pfn->align = le32_to_cpu(pfn_sb->align);
-	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+	if (align > nvdimm_namespace_capacity(ndns)) {
 		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
-				nd_pfn->align, nvdimm_namespace_capacity(ndns));
+				align, nvdimm_namespace_capacity(ndns));
 		return -EINVAL;
 	}
 
@@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	 * namespace has changed since the pfn superblock was
 	 * established.
 	 */
-	offset = le64_to_cpu(pfn_sb->dataoff);
 	nsio = to_nd_namespace_io(&ndns->dev);
 	if (offset >= resource_size(&nsio->res)) {
 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EBUSY;
 	}
 
-	if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
+	if ((align && !IS_ALIGNED(offset, align))
 			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
-		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
-				offset);
+		dev_err(&nd_pfn->dev,
+				"bad offset: %#llx dax disabled align: %#lx\n",
+				offset, align);
 		return -ENXIO;
 	}
 
@@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	res->start += start_pad;
 	res->end -= end_trunc;
 
-	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < SZ_8K)
 			return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3-55-g7522


From 4995734e973a2c2e9c6f6413cbad9913fc4df0dc Mon Sep 17 00:00:00 2001
From: Dan Williams
Date: Fri, 24 Jun 2016 09:07:39 -0700
Subject: acpi, nfit: fix acpi_check_dsm() vs zero functions implemented

QEMU 2.6 implements nascent support for nvdimm DSMs. Depending on
configuration it may only implement the function0 dsm to indicate that
no other DSMs are available. Commit 31eca76ba2fc "nfit, libnvdimm:
limited/whitelisted dimm command marshaling mechanism" breaks QEMU, but
QEMU is spec compliant.  Per the spec the way to indicate that no
functions are supported is:

    If Function Index is zero, the return is a buffer containing one bit
    for each function index, starting with zero. Bit 0 indicates whether
    there is support for any functions other than function 0 for the
    specified UUID and Revision ID. If set to zero, no functions are
    supported (other than function zero) for the specified UUID and
    Revision ID.

Update the nfit driver to determine the family (interface UUID) without
requiring the implementation to define any other functions, i.e.
short-circuit acpi_check_dsm() to succeed per the spec.  The nfit driver
appears to be the only user passing funcs==0 to acpi_check_dsm(), so
this behavior change of the common routine should be limited to the
probing done by the nfit driver.

Cc: Len Brown <lenb@kernel.org>
Cc: Jerry Hoemann <jerry.hoemann@hpe.com>
Acked-by: "Rafael J. Wysocki" <rafael@kernel.org>
Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism")
Reported-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Tested-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c  | 6 +++---
 drivers/acpi/utils.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 2215fc847fa9..32579a7b71d5 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -1131,11 +1131,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 
 	/*
 	 * Until standardization materializes we need to consider up to 3
-	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this uuid.
+	 * different command sets.  Note, that checking for zero functions
+	 * tells us if any commands might be reachable through this uuid.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
-		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
+		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 0))
 			break;
 
 	/* limit the supported commands to those that are publicly documented */
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 22c09952e177..b4de130f2d57 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -680,9 +680,6 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	u64 mask = 0;
 	union acpi_object *obj;
 
-	if (funcs == 0)
-		return false;
-
 	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
 	if (!obj)
 		return false;
@@ -695,6 +692,9 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 			mask |= (((u64)obj->buffer.pointer[i]) << (i * 8));
 	ACPI_FREE(obj);
 
+	if (funcs == 0)
+		return true;
+
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
 	 * function 0 for the specified UUID and revision.
-- 
cgit v1.2.3-55-g7522


From 023954351fae0e34ba247cff4d798c98290b20a4 Mon Sep 17 00:00:00 2001
From: Eric Sandeen
Date: Thu, 23 Jun 2016 16:54:46 -0500
Subject: dax: fix offset overflow in dax_io

This isn't functionally apparent for some reason, but
when we test io at extreme offsets at the end of the loff_t
rang, such as in fstests xfs/071, the calculation of
"max" in dax_io() can be wrong due to pos + size overflowing.

For example,

# xfs_io -c "pwrite 9223372036854771712 512" /mnt/test/file

enters dax_io with:

start 0x7ffffffffffff000
end   0x7ffffffffffff200

and the rounded up "size" variable is 0x1000.  This yields:

pos + size 0x8000000000000000 (overflows loff_t)
       end 0x7ffffffffffff200

Due to the overflow, the min() function picks the wrong
value for the "max" variable, and when we send (max - pos)
into i.e. copy_from_iter_pmem() it is also the wrong value.

This somehow(tm) gets magically absorbed without incident,
probably because iter->count is correct.  But it seems best
to fix it up properly by comparing the two values as
unsigned.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 761495bf5eb9..e207f8f9b700 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 				dax.addr += first;
 				size = map_len - first;
 			}
-			max = min(pos + size, end);
+			/*
+			 * pos + size is one past the last offset for IO,
+			 * so pos + size can overflow loff_t at extreme offsets.
+			 * Cast to u64 to catch this and get the true minimum.
+			 */
+			max = min_t(u64, pos + size, end);
 		}
 
 		if (iov_iter_rw(iter) == WRITE) {
-- 
cgit v1.2.3-55-g7522


From 1bcbf42d2732b3fdaa8559b0dfc91567769e23c8 Mon Sep 17 00:00:00 2001
From: Dan Williams
Date: Wed, 29 Jun 2016 11:19:32 -0700
Subject: nfit: fix format interface code byte order

Per JEDEC Annex L Release 3 the SPD data is:

Bits 9~5 00 000 = Function Undefined
         00 001 = Byte addressable energy backed
         00 010 = Block addressed
         00 011 = Byte addressable, no energy backed
         All other codes reserved
Bits 4~0 0 0000 = Proprietary interface
         0 0001 = Standard interface 1
         All other codes reserved; see Definitions of Functions

...and per the ACPI 6.1 spec:

    byte0: Bits 4~0 (0 or 1)
    byte1: Bits 9~5 (1, 2, or 3)

...so a format interface code displayed as 0x301 should be stored in the
nfit as (0x1, 0x3), little-endian.

Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Robert Elliott <elliott@hpe.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=121161
Fixes: 30ec5fd464d5 ("nfit: fix format interface code byte order per ACPI6.1")
Fixes: 5ad9a7fde07a ("acpi/nfit: Update nfit driver to comply with ACPI 6.1")
Reported-by: Kristin Jacque <kristin.jacque@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c |  6 +++---
 drivers/acpi/nfit.h | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 32579a7b71d5..ac6ddcc080d4 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev,
 {
 	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
 
-	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
+	return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
 }
 static DEVICE_ATTR_RO(format);
 
@@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev,
 				continue;
 			if (nfit_dcr->dcr->code == dcr->code)
 				continue;
-			rc = sprintf(buf, "%#x\n",
-					be16_to_cpu(nfit_dcr->dcr->code));
+			rc = sprintf(buf, "0x%04x\n",
+					le16_to_cpu(nfit_dcr->dcr->code));
 			break;
 		}
 		if (rc != ENXIO)
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 11cb38348aef..02b9ea1e8d2e 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -53,12 +53,12 @@ enum nfit_uuids {
 };
 
 /*
- * Region format interface codes are stored as an array of bytes in the
- * NFIT DIMM Control Region structure
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
  */
-#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
 
 enum {
 	NFIT_BLK_READ_FLUSH = 1,
-- 
cgit v1.2.3-55-g7522