diff mbox

[3/3] pmem: Use the poison list to expose badblocks

Message ID 1450603122-7205-4-git-send-email-vishal@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Vishal Verma Dec. 20, 2015, 9:18 a.m. UTC
From: Vishal Verma <vishal.l.verma@intel.com>

Enable the gendisk badblocks feature for pmem namespaces.
If the pmem namespace being created has any known poison associated with
its physical address space, convert the poison ranges to bad sectors
exposed using the badblocks interface.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 drivers/nvdimm/pmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

Comments

kernel test robot Dec. 20, 2015, 9:31 a.m. UTC | #1
Hi Vishal,

[auto build test ERROR on linux-nvdimm/libnvdimm-for-next]
[also build test ERROR on v4.4-rc5 next-20151218]

url:    https://github.com/0day-ci/linux/commits/vishal-kernel-org/Expose-known-poison-in-SPA-ranges-to-the-block-layer/20151220-172142
base:   https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm libnvdimm-for-next
config: x86_64-randconfig-x017-201551 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/nvdimm/pmem.c: In function 'pmem_add_badblock_range':
>> drivers/nvdimm/pmem.c:201:9: error: implicit declaration of function 'disk_set_badblocks' [-Werror=implicit-function-declaration]
       rc = disk_set_badblocks(disk, s, done);
            ^
   drivers/nvdimm/pmem.c: In function 'pmem_attach_disk':
>> drivers/nvdimm/pmem.c:314:8: error: implicit declaration of function 'disk_alloc_badblocks' [-Werror=implicit-function-declaration]
     ret = disk_alloc_badblocks(disk);
           ^
   cc1: some warnings being treated as errors

vim +/disk_set_badblocks +201 drivers/nvdimm/pmem.c

   195			sector_t s = start_sector;
   196			int rc;
   197	
   198			while (remaining) {
   199				int done = min_t(u64, remaining, INT_MAX);
   200	
 > 201				rc = disk_set_badblocks(disk, s, done);
   202				if (rc)
   203					return rc;
   204				remaining -= done;
   205				s += done;
   206			}
   207			return 0;
   208		} else
   209			return disk_set_badblocks(disk, start_sector, num_sectors);
   210	}
   211	
   212	/*
   213	 * The poison list generated during NFIT initialization may contain multiple,
   214	 * possibly overlapping ranges in the SPA (System Physical Address) space.
   215	 * Compare each of these ranges to the pmem namespace currently being
   216	 * initialized, and add badblocks for the sub-ranges that match
   217	 */
   218	static int pmem_add_poison(struct gendisk *disk, struct nvdimm_bus *nvdimm_bus)
   219	{
   220		struct pmem_device *pmem = disk->private_data;
   221		struct list_head *poison_list;
   222		struct nd_poison *pl;
   223		int rc;
   224	
   225		poison_list = nvdimm_bus_get_poison_list(nvdimm_bus);
   226		if (list_empty(poison_list))
   227			return 0;
   228	
   229		list_for_each_entry(pl, poison_list, list) {
   230			u64 pl_end = pl->start + pl->length - 1;
   231			u64 pmem_end = pmem->phys_addr + pmem->size - 1;
   232	
   233			/* Discard intervals with no intersection */
   234			if (pl_end < pmem->phys_addr)
   235				continue;
   236			if (pl->start > pmem_end)
   237				continue;
   238			/* Deal with any overlap after start of the pmem range */
   239			if (pl->start >= pmem->phys_addr) {
   240				u64 start = pl->start;
   241				u64 len;
   242	
   243				if (pl_end <= pmem_end)
   244					len = pl->length;
   245				else
   246					len = pmem->phys_addr + pmem->size - pl->start;
   247	
   248				rc = pmem_add_badblock_range(disk, start, len);
   249				if (rc)
   250					return rc;
   251				dev_info(&nvdimm_bus->dev,
   252					"Found a poison range (0x%llx, 0x%llx)\n",
   253					start, len);
   254				continue;
   255			}
   256			/* Deal with overlap for poison starting before pmem range */
   257			if (pl->start < pmem->phys_addr) {
   258				u64 start = pmem->phys_addr;
   259				u64 len;
   260	
   261				if (pl_end < pmem_end)
   262					len = pl->start + pl->length - pmem->phys_addr;
   263				else
   264					len = pmem->size;
   265	
   266				rc = pmem_add_badblock_range(disk, start, len);
   267				if (rc)
   268					return rc;
   269				dev_info(&nvdimm_bus->dev,
   270					"Found a poison range (0x%llx, 0x%llx)\n",
   271					start, len);
   272			}
   273		}
   274	
   275		return 0;
   276	}
   277	
   278	static int pmem_attach_disk(struct device *dev,
   279			struct nd_namespace_common *ndns, struct pmem_device *pmem)
   280	{
   281		struct nd_region *nd_region = to_nd_region(dev->parent);
   282		struct nvdimm_bus *nvdimm_bus;
   283		int nid = dev_to_node(dev);
   284		struct gendisk *disk;
   285		int ret;
   286	
   287		pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
   288		if (!pmem->pmem_queue)
   289			return -ENOMEM;
   290	
   291		blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
   292		blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
   293		blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
   294		blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
   295		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
   296	
   297		disk = alloc_disk_node(0, nid);
   298		if (!disk) {
   299			blk_cleanup_queue(pmem->pmem_queue);
   300			return -ENOMEM;
   301		}
   302	
   303		disk->major		= pmem_major;
   304		disk->first_minor	= 0;
   305		disk->fops		= &pmem_fops;
   306		disk->private_data	= pmem;
   307		disk->queue		= pmem->pmem_queue;
   308		disk->flags		= GENHD_FL_EXT_DEVT;
   309		nvdimm_namespace_disk_name(ndns, disk->disk_name);
   310		disk->driverfs_dev = dev;
   311		set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
   312		pmem->pmem_disk = disk;
   313	
 > 314		ret = disk_alloc_badblocks(disk);
   315		if (ret)
   316			return ret;
   317	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Dan Williams Dec. 21, 2015, 1:20 a.m. UTC | #2
On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
> From: Vishal Verma <vishal.l.verma@intel.com>
>
> Enable the gendisk badblocks feature for pmem namespaces.
> If the pmem namespace being created has any known poison associated with
> its physical address space, convert the poison ranges to bad sectors
> exposed using the badblocks interface.
>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  drivers/nvdimm/pmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 124 insertions(+)

I think we should move this new functionality to the core because
there is not much pmem driver specific.  It's all generic nvdimm-core
and block-core functionality.  The only missing information the core
routine needs is the gendisk and a data offset (if sector-zero is at
an offset from the base address range of the namespace).  Something
like:

nvdimm_namespace_disk_poison(struct nd_namespace_common *ndns,
    resource_size_t offset, struct gendisk *disk)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Verma, Vishal L Dec. 21, 2015, 6:50 p.m. UTC | #3
T24gU3VuLCAyMDE1LTEyLTIwIGF0IDE3OjIwIC0wODAwLCBEYW4gV2lsbGlhbXMgd3JvdGU6DQo+
IE9uIFN1biwgRGVjIDIwLCAyMDE1IGF0IDE6MTggQU0swqDCoDx2aXNoYWxAa2VybmVsLm9yZz4g
d3JvdGU6DQo+ID4gRnJvbTogVmlzaGFsIFZlcm1hIDx2aXNoYWwubC52ZXJtYUBpbnRlbC5jb20+
DQo+ID4gDQo+ID4gRW5hYmxlIHRoZSBnZW5kaXNrIGJhZGJsb2NrcyBmZWF0dXJlIGZvciBwbWVt
IG5hbWVzcGFjZXMuDQo+ID4gSWYgdGhlIHBtZW0gbmFtZXNwYWNlIGJlaW5nIGNyZWF0ZWQgaGFz
IGFueSBrbm93biBwb2lzb24gYXNzb2NpYXRlZA0KPiA+IHdpdGgNCj4gPiBpdHMgcGh5c2ljYWwg
YWRkcmVzcyBzcGFjZSwgY29udmVydCB0aGUgcG9pc29uIHJhbmdlcyB0byBiYWQgc2VjdG9ycw0K
PiA+IGV4cG9zZWQgdXNpbmcgdGhlIGJhZGJsb2NrcyBpbnRlcmZhY2UuDQo+ID4gDQo+ID4gU2ln
bmVkLW9mZi1ieTogVmlzaGFsIFZlcm1hIDx2aXNoYWwubC52ZXJtYUBpbnRlbC5jb20+DQo+ID4g
LS0tDQo+ID4gwqBkcml2ZXJzL252ZGltbS9wbWVtLmMgfCAxMjQNCj4gPiArKysrKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKw0KPiA+IMKgMSBmaWxlIGNoYW5n
ZWQsIDEyNCBpbnNlcnRpb25zKCspDQo+IA0KPiBJIHRoaW5rIHdlIHNob3VsZCBtb3ZlIHRoaXMg
bmV3IGZ1bmN0aW9uYWxpdHkgdG8gdGhlIGNvcmUgYmVjYXVzZQ0KPiB0aGVyZSBpcyBub3QgbXVj
aCBwbWVtIGRyaXZlciBzcGVjaWZpYy7CoMKgSXQncyBhbGwgZ2VuZXJpYyBudmRpbW0tY29yZQ0K
PiBhbmQgYmxvY2stY29yZSBmdW5jdGlvbmFsaXR5LsKgwqBUaGUgb25seSBtaXNzaW5nIGluZm9y
bWF0aW9uIHRoZSBjb3JlDQo+IHJvdXRpbmUgbmVlZHMgaXMgdGhlIGdlbmRpc2sgYW5kIGEgZGF0
YSBvZmZzZXQgKGlmIHNlY3Rvci16ZXJvIGlzIGF0DQo+IGFuIG9mZnNldCBmcm9tIHRoZSBiYXNl
IGFkZHJlc3MgcmFuZ2Ugb2YgdGhlIG5hbWVzcGFjZSkuwqDCoFNvbWV0aGluZw0KPiBsaWtlOg0K
PiANCj4gbnZkaW1tX25hbWVzcGFjZV9kaXNrX3BvaXNvbihzdHJ1Y3QgbmRfbmFtZXNwYWNlX2Nv
bW1vbiAqbmRucywNCj4gwqDCoMKgwqByZXNvdXJjZV9zaXplX3Qgb2Zmc2V0LCBzdHJ1Y3QgZ2Vu
ZGlzayAqZGlzaykNCg0KVGhpcyBzaG91bGQgYmUgZWFzeSB0byBkbywgaG93ZXZlciBpc24ndCBp
dCBhIGJpdCBjb3VudGVyLWludHVpdGl2ZSB0bw0KbW92ZSB0aGlzIGludG8gY29yZT8gVGhlIGxv
d2VyIGxldmVsIGRyaXZlcnMgcG1lbS9ibGsvYnR0IGFyZSBhbGwgb3duZXJzDQpvZiB0aGVpciBy
ZXNwZWN0aXZlIGdlbmRpc2tzLCBhbmQgc28gZG9lc24ndCBpdCBtYWtlIG1vcmUgc2Vuc2UgZm9y
IHRoZW0NCnRvIGJlIGluIGNvbnRyb2wgb2YgbWFuaXB1bGF0aW5nIHRoZWlyIGdlbmRpc2sgZGF0
YS4gQWxzbywgSSBjYW4gc2VlDQptb3ZpbmcgdGhlbSBpZiB0aGlzIHdhcyBhIGNvbW1vbiBvcGVy
YXRpb24sIGJ1dCBvbmx5IHBtZW0gd2lsbCBldmVyIG5lZWQNCnRvIGRvIHRoaXMuLg0KDQpJJ20g
bm90IHRvbyBzdHJvbmdseSBvcHBvc2VkIHRvIHRoaXMgaG93ZXZlciAtIHRoZSBvbmUgdGhpbmcg
dGhhdCBkaWQNCmZlZWwgYSBiaXQgYXdrd2FyZCBiZWluZyBpbiBwbWVtIHdhcyB0aGF0IHdlIGFz
ayBjb3JlIGZvciBhIHN0cnVjdA0KbGlzdF9oZWFkIGFuZCB0aGVuIHdhbGsgaXQgb3Vyc2VsdmVz
IC0gcG1lbSBkb2Vzbid0IG5vcm1hbGx5IGtub3cgYWJvdXQNCnRoZSBpbnRlcm5hbHMgb2YgbnZk
aW1tX2J1cywgYnV0IHdpdGggdGhpcyB3ZSBpbXBsaWNpdGx5IG1ha2UgaXQgYXdhcmUu
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams Dec. 21, 2015, 7:10 p.m. UTC | #4
On Mon, Dec 21, 2015 at 10:50 AM, Verma, Vishal L
<vishal.l.verma@intel.com> wrote:
> On Sun, 2015-12-20 at 17:20 -0800, Dan Williams wrote:
>> On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
>> > From: Vishal Verma <vishal.l.verma@intel.com>
>> >
>> > Enable the gendisk badblocks feature for pmem namespaces.
>> > If the pmem namespace being created has any known poison associated
>> > with
>> > its physical address space, convert the poison ranges to bad sectors
>> > exposed using the badblocks interface.
>> >
>> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>> > ---
>> >  drivers/nvdimm/pmem.c | 124
>> > ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  1 file changed, 124 insertions(+)
>>
>> I think we should move this new functionality to the core because
>> there is not much pmem driver specific.  It's all generic nvdimm-core
>> and block-core functionality.  The only missing information the core
>> routine needs is the gendisk and a data offset (if sector-zero is at
>> an offset from the base address range of the namespace).  Something
>> like:
>>
>> nvdimm_namespace_disk_poison(struct nd_namespace_common *ndns,
>>     resource_size_t offset, struct gendisk *disk)
>
> This should be easy to do, however isn't it a bit counter-intuitive to
> move this into core? The lower level drivers pmem/blk/btt are all owners
> of their respective gendisks, and so doesn't it make more sense for them
> to be in control of manipulating their gendisk data. Also, I can see
> moving them if this was a common operation, but only pmem will ever need
> to do this..
>
> I'm not too strongly opposed to this however - the one thing that did
> feel a bit awkward being in pmem was that we ask core for a struct
> list_head and then walk it ourselves - pmem doesn't normally know about
> the internals of nvdimm_bus, but with this we implicitly make it aware.

Yeah, it's borderline, but teaching pmem how to walk a list that the
core built up is what triggered the suggestion.  It's true that the
pmem driver owns its gendisk, but that's why it gets to make the
decision to call the library helper or not.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams Dec. 23, 2015, 8:28 p.m. UTC | #5
On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
> From: Vishal Verma <vishal.l.verma@intel.com>
>
> Enable the gendisk badblocks feature for pmem namespaces.
> If the pmem namespace being created has any known poison associated with
> its physical address space, convert the poison ranges to bad sectors
> exposed using the badblocks interface.
>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  drivers/nvdimm/pmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 124 insertions(+)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 8ee7989..462570f 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
[..]
> @@ -196,6 +311,15 @@ static int pmem_attach_disk(struct device *dev,
>         set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
>         pmem->pmem_disk = disk;
>
> +       ret = disk_alloc_badblocks(disk);
> +       if (ret)
> +               return ret;

I think we should skip allocating a bad block list in the case we find
no poison.  Then we can do a simple "if (disk->bb)" to check if there
are any bad blocks.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Verma, Vishal L Dec. 23, 2015, 8:32 p.m. UTC | #6
T24gV2VkLCAyMDE1LTEyLTIzIGF0IDEyOjI4IC0wODAwLCBEYW4gV2lsbGlhbXMgd3JvdGU6DQo+
IE9uIFN1biwgRGVjIDIwLCAyMDE1IGF0IDE6MTggQU0swqDCoDx2aXNoYWxAa2VybmVsLm9yZz4g
d3JvdGU6DQo+ID4gRnJvbTogVmlzaGFsIFZlcm1hIDx2aXNoYWwubC52ZXJtYUBpbnRlbC5jb20+
DQo+ID4gDQo+ID4gRW5hYmxlIHRoZSBnZW5kaXNrIGJhZGJsb2NrcyBmZWF0dXJlIGZvciBwbWVt
IG5hbWVzcGFjZXMuDQo+ID4gSWYgdGhlIHBtZW0gbmFtZXNwYWNlIGJlaW5nIGNyZWF0ZWQgaGFz
IGFueSBrbm93biBwb2lzb24gYXNzb2NpYXRlZA0KPiA+IHdpdGgNCj4gPiBpdHMgcGh5c2ljYWwg
YWRkcmVzcyBzcGFjZSwgY29udmVydCB0aGUgcG9pc29uIHJhbmdlcyB0byBiYWQgc2VjdG9ycw0K
PiA+IGV4cG9zZWQgdXNpbmcgdGhlIGJhZGJsb2NrcyBpbnRlcmZhY2UuDQo+ID4gDQo+ID4gU2ln
bmVkLW9mZi1ieTogVmlzaGFsIFZlcm1hIDx2aXNoYWwubC52ZXJtYUBpbnRlbC5jb20+DQo+ID4g
LS0tDQo+ID4gwqBkcml2ZXJzL252ZGltbS9wbWVtLmMgfCAxMjQNCj4gPiArKysrKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKw0KPiA+IMKgMSBmaWxlIGNoYW5n
ZWQsIDEyNCBpbnNlcnRpb25zKCspDQo+ID4gDQo+ID4gZGlmZiAtLWdpdCBhL2RyaXZlcnMvbnZk
aW1tL3BtZW0uYyBiL2RyaXZlcnMvbnZkaW1tL3BtZW0uYw0KPiA+IGluZGV4IDhlZTc5ODkuLjQ2
MjU3MGYgMTAwNjQ0DQo+ID4gLS0tIGEvZHJpdmVycy9udmRpbW0vcG1lbS5jDQo+ID4gKysrIGIv
ZHJpdmVycy9udmRpbW0vcG1lbS5jDQo+IFsuLl0NCj4gPiBAQCAtMTk2LDYgKzMxMSwxNSBAQCBz
dGF0aWMgaW50IHBtZW1fYXR0YWNoX2Rpc2soc3RydWN0IGRldmljZSAqZGV2LA0KPiA+IMKgwqDC
oMKgwqDCoMKgwqBzZXRfY2FwYWNpdHkoZGlzaywgKHBtZW0tPnNpemUgLSBwbWVtLT5kYXRhX29m
ZnNldCkgLyA1MTIpOw0KPiA+IMKgwqDCoMKgwqDCoMKgwqBwbWVtLT5wbWVtX2Rpc2sgPSBkaXNr
Ow0KPiA+IA0KPiA+ICvCoMKgwqDCoMKgwqDCoHJldCA9IGRpc2tfYWxsb2NfYmFkYmxvY2tzKGRp
c2spOw0KPiA+ICvCoMKgwqDCoMKgwqDCoGlmIChyZXQpDQo+ID4gK8KgwqDCoMKgwqDCoMKgwqDC
oMKgwqDCoMKgwqDCoHJldHVybiByZXQ7DQo+IA0KPiBJIHRoaW5rIHdlIHNob3VsZCBza2lwIGFs
bG9jYXRpbmcgYSBiYWQgYmxvY2sgbGlzdCBpbiB0aGUgY2FzZSB3ZSBmaW5kDQo+IG5vIHBvaXNv
bi7CoMKgVGhlbiB3ZSBjYW4gZG8gYSBzaW1wbGUgImlmIChkaXNrLT5iYikiIHRvIGNoZWNrIGlm
IHRoZXJlDQo+IGFyZSBhbnkgYmFkIGJsb2Nrcy4NCg0KU291bmRzIGdvb2QgLSBJJ2xsIG1vdmUg
dGhpcyBhbGxvY2F0aW9uIGludG8gdGhlICh3aGF0IHdpbGwgYmUgYSBjb3JlKQ0Kcm91dGluZSB0
aGF0IHRyYXZlcnNlcyB0aGUgbGlzdCwgYW5kIG1ha2UgaXQgc3VjaCB0aGF0IHdlIGFsbG9jYXRl
DQpkaXNrLT5iYiBpZmYgYSBwb2lzb24gZW50cnkgaXMgZm91bmQgZm9yIF90aGlzXyBwbWVtIHJh
bmdlLg0KDQpJdCB3aWxsIG1lYW4gYSBjaGVjayBhdCBldmVyeSBsaXN0IG5vZGUsIGJ1dCB0aGF0
IHNob3VsZG4ndCBtYXR0ZXIgYXMNCnRoaXMgaXMganVzdCBpbml0LXRpbWUgb3ZlcmhlYWQuDQoN
CgktVmlzaGFs
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams Dec. 23, 2015, 8:38 p.m. UTC | #7
On Wed, Dec 23, 2015 at 12:32 PM, Verma, Vishal L
<vishal.l.verma@intel.com> wrote:
> On Wed, 2015-12-23 at 12:28 -0800, Dan Williams wrote:
>> On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
>> > From: Vishal Verma <vishal.l.verma@intel.com>
>> >
>> > Enable the gendisk badblocks feature for pmem namespaces.
>> > If the pmem namespace being created has any known poison associated
>> > with
>> > its physical address space, convert the poison ranges to bad sectors
>> > exposed using the badblocks interface.
>> >
>> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>> > ---
>> >  drivers/nvdimm/pmem.c | 124
>> > ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  1 file changed, 124 insertions(+)
>> >
>> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
>> > index 8ee7989..462570f 100644
>> > --- a/drivers/nvdimm/pmem.c
>> > +++ b/drivers/nvdimm/pmem.c
>> [..]
>> > @@ -196,6 +311,15 @@ static int pmem_attach_disk(struct device *dev,
>> >         set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
>> >         pmem->pmem_disk = disk;
>> >
>> > +       ret = disk_alloc_badblocks(disk);
>> > +       if (ret)
>> > +               return ret;
>>
>> I think we should skip allocating a bad block list in the case we find
>> no poison.  Then we can do a simple "if (disk->bb)" to check if there
>> are any bad blocks.
>
> Sounds good - I'll move this allocation into the (what will be a core)
> routine that traverses the list, and make it such that we allocate
> disk->bb iff a poison entry is found for _this_ pmem range.
>
> It will mean a check at every list node, but that shouldn't matter as
> this is just init-time overhead.
>

...or free it on "no poison found".
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8ee7989..462570f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -19,14 +19,18 @@ 
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/init.h>
+#include <linux/list.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
+#include <linux/libnvdimm.h>
 #include <linux/vmalloc.h>
+#include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "nd-core.h"
 #include "pfn.h"
 #include "nd.h"
 
@@ -163,11 +167,122 @@  static void pmem_detach_disk(struct pmem_device *pmem)
 	blk_cleanup_queue(pmem->pmem_queue);
 }
 
+/**
+ * pmem_add_badblock_range() - Convert a physical address range to bad sectors
+ * @disk:	the disk associated with the pmem namespace
+ * @start:	start of the physical address range
+ * @length:	number of bytes of poison to be added
+ *
+ * This assumes that the range provided with (start, length) is already within
+ * the bounds of physical addresses for this namespace, i.e. lies in the
+ * interval [pmem->phys_addr, pmem->phys_addr + pmem->size)
+ */
+static int pmem_add_badblock_range(struct gendisk *disk, u64 start, u64 length)
+{
+	unsigned int sector_size = queue_logical_block_size(disk->queue);
+	struct pmem_device *pmem = disk->private_data;
+	sector_t start_sector;
+	u64 num_sectors;
+	u32 rem;
+
+	start_sector = div_u64(start - pmem->phys_addr, sector_size);
+	num_sectors = div_u64_rem(length, sector_size, &rem);
+	if (rem)
+		num_sectors++;
+
+	if (unlikely(num_sectors > (u64)INT_MAX)) {
+		u64 remaining = num_sectors;
+		sector_t s = start_sector;
+		int rc;
+
+		while (remaining) {
+			int done = min_t(u64, remaining, INT_MAX);
+
+			rc = disk_set_badblocks(disk, s, done);
+			if (rc)
+				return rc;
+			remaining -= done;
+			s += done;
+		}
+		return 0;
+	} else
+		return disk_set_badblocks(disk, start_sector, num_sectors);
+}
+
+/*
+ * The poison list generated during NFIT initialization may contain multiple,
+ * possibly overlapping ranges in the SPA (System Physical Address) space.
+ * Compare each of these ranges to the pmem namespace currently being
+ * initialized, and add badblocks for the sub-ranges that match
+ */
+static int pmem_add_poison(struct gendisk *disk, struct nvdimm_bus *nvdimm_bus)
+{
+	struct pmem_device *pmem = disk->private_data;
+	struct list_head *poison_list;
+	struct nd_poison *pl;
+	int rc;
+
+	poison_list = nvdimm_bus_get_poison_list(nvdimm_bus);
+	if (list_empty(poison_list))
+		return 0;
+
+	list_for_each_entry(pl, poison_list, list) {
+		u64 pl_end = pl->start + pl->length - 1;
+		u64 pmem_end = pmem->phys_addr + pmem->size - 1;
+
+		/* Discard intervals with no intersection */
+		if (pl_end < pmem->phys_addr)
+			continue;
+		if (pl->start > pmem_end)
+			continue;
+		/* Deal with any overlap after start of the pmem range */
+		if (pl->start >= pmem->phys_addr) {
+			u64 start = pl->start;
+			u64 len;
+
+			if (pl_end <= pmem_end)
+				len = pl->length;
+			else
+				len = pmem->phys_addr + pmem->size - pl->start;
+
+			rc = pmem_add_badblock_range(disk, start, len);
+			if (rc)
+				return rc;
+			dev_info(&nvdimm_bus->dev,
+				"Found a poison range (0x%llx, 0x%llx)\n",
+				start, len);
+			continue;
+		}
+		/* Deal with overlap for poison starting before pmem range */
+		if (pl->start < pmem->phys_addr) {
+			u64 start = pmem->phys_addr;
+			u64 len;
+
+			if (pl_end < pmem_end)
+				len = pl->start + pl->length - pmem->phys_addr;
+			else
+				len = pmem->size;
+
+			rc = pmem_add_badblock_range(disk, start, len);
+			if (rc)
+				return rc;
+			dev_info(&nvdimm_bus->dev,
+				"Found a poison range (0x%llx, 0x%llx)\n",
+				start, len);
+		}
+	}
+
+	return 0;
+}
+
 static int pmem_attach_disk(struct device *dev,
 		struct nd_namespace_common *ndns, struct pmem_device *pmem)
 {
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nvdimm_bus *nvdimm_bus;
 	int nid = dev_to_node(dev);
 	struct gendisk *disk;
+	int ret;
 
 	pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
 	if (!pmem->pmem_queue)
@@ -196,6 +311,15 @@  static int pmem_attach_disk(struct device *dev,
 	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
 
+	ret = disk_alloc_badblocks(disk);
+	if (ret)
+		return ret;
+
+	nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
+	ret = pmem_add_poison(disk, nvdimm_bus);
+	if (ret)
+		return ret;
+
 	add_disk(disk);
 	revalidate_disk(disk);