Message ID | 1463509797-10324-2-git-send-email-jonathan.derrick@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, May 17, 2016 at 11:29 AM, Jon Derrick <jonathan.derrick@intel.com> wrote: > This patch fixes S_DAX bd_inode i_flag locking to conform to suggested A "fix" implies that its currently broken. I don't see how it is, not until we add an ioctl method or other path that also tries to update the flags outside of blkdev_get() context. So, I don't think this patch stands on its own if you were intending it to be merged separately. > locking rules. It is presumed that S_DAX is the only valid inode flag > for a block device which subscribes to direct-access, and will restore > any previously set flags if direct-access initialization fails. > > This reverts to i_flags behavior prior to > bbab37ddc20bae4709bca8745c128c4f46fe63c5 > by allowing other bd_inode flags when DAX is disabled > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > --- > fs/block_dev.c | 31 ++++++++++++++++++++++++++----- > 1 file changed, 26 insertions(+), 5 deletions(-) > > diff --git a/fs/block_dev.c b/fs/block_dev.c > index 20a2c02..d41e37f 100644 > --- a/fs/block_dev.c > +++ b/fs/block_dev.c > @@ -1159,6 +1159,20 @@ void bd_set_size(struct block_device *bdev, loff_t size) > } > EXPORT_SYMBOL(bd_set_size); > > +static void bd_add_dax(struct inode *inode) > +{ > + inode_lock(inode); > + inode->i_flags |= S_DAX; > + inode_unlock(inode); > +} > + > +static void bd_clear_dax(struct inode *inode) > +{ > + inode_lock(inode); > + inode->i_flags &= ~S_DAX; > + inode_unlock(inode); > +} Since this is inode generic should these helpers be prefixed "i_" rather than "bd_"? > + > static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); > > /* > @@ -1172,6 +1186,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > { > struct gendisk *disk; > struct module *owner; > + struct inode *inode; > int ret; > int partno; > int perm = 0; > @@ -1198,6 +1213,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > if (!disk) > goto out; > owner = disk->fops->owner; > + inode = bdev->bd_inode; > > disk_block_events(disk); > mutex_lock_nested(&bdev->bd_mutex, for_part); > @@ -1206,9 +1222,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > bdev->bd_queue = disk->queue; > bdev->bd_contains = bdev; > if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) > - bdev->bd_inode->i_flags = S_DAX; > - else > - bdev->bd_inode->i_flags = 0; > + bd_add_dax(inode); > > if (!partno) { > ret = -ENXIO; > @@ -1228,6 +1242,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > bdev->bd_part = NULL; > bdev->bd_disk = NULL; > bdev->bd_queue = NULL; > + bd_clear_dax(inode); > mutex_unlock(&bdev->bd_mutex); > disk_unblock_events(disk); > put_disk(disk); > @@ -1239,7 +1254,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > if (!ret) { > bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); > if (!blkdev_dax_capable(bdev)) > - bdev->bd_inode->i_flags &= ~S_DAX; > + bd_clear_dax(inode); > } > > /* > @@ -1276,7 +1291,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > } > bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); > if (!blkdev_dax_capable(bdev)) > - bdev->bd_inode->i_flags &= ~S_DAX; > + bd_clear_dax(inode); > } > } else { > if (bdev->bd_contains == bdev) { > @@ -1297,6 +1312,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > put_disk(disk); > module_put(owner); > } > + inode_lock(inode); > + if (inode->i_flags & S_DAX) > + inode->i_flags = S_DAX; > + inode_unlock(inode); > + Clear all other flags if S_DAX is set? Why? > bdev->bd_openers++; > if (for_part) > bdev->bd_part_count++; > @@ -1309,6 +1329,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > bdev->bd_disk = NULL; > bdev->bd_part = NULL; > bdev->bd_queue = NULL; > + bd_clear_dax(inode); Why? -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Dan, Thanks for the review, On Tue, May 17, 2016 at 12:34:57PM -0700, Dan Williams wrote: > On Tue, May 17, 2016 at 11:29 AM, Jon Derrick > <jonathan.derrick@intel.com> wrote: > > This patch fixes S_DAX bd_inode i_flag locking to conform to suggested > > A "fix" implies that its currently broken. I don't see how it is, not > until we add an ioctl method or other path that also tries to update > the flags outside of blkdev_get() context. So, I don't think this > patch stands on its own if you were intending it to be merged > separately. Are there no other paths that can set/clear a bd_inode->i_flags? If not, that's fine and I'll pack this into the next HIPRI set > > > locking rules. It is presumed that S_DAX is the only valid inode flag > > for a block device which subscribes to direct-access, and will restore > > any previously set flags if direct-access initialization fails. > > > > This reverts to i_flags behavior prior to > > bbab37ddc20bae4709bca8745c128c4f46fe63c5 > > by allowing other bd_inode flags when DAX is disabled > > > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > > --- > > fs/block_dev.c | 31 ++++++++++++++++++++++++++----- > > 1 file changed, 26 insertions(+), 5 deletions(-) > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c > > index 20a2c02..d41e37f 100644 > > --- a/fs/block_dev.c > > +++ b/fs/block_dev.c > > @@ -1159,6 +1159,20 @@ void bd_set_size(struct block_device *bdev, loff_t size) > > } > > EXPORT_SYMBOL(bd_set_size); > > > > +static void bd_add_dax(struct inode *inode) > > +{ > > + inode_lock(inode); > > + inode->i_flags |= S_DAX; > > + inode_unlock(inode); > > +} > > + > > +static void bd_clear_dax(struct inode *inode) > > +{ > > + inode_lock(inode); > > + inode->i_flags &= ~S_DAX; > > + inode_unlock(inode); > > +} > > Since this is inode generic should these helpers be prefixed "i_" > rather than "bd_"? That's fine. bd_acquire/forget also use the inode in its parameter list, but its simple enough to use the bdev instead for these functions or i_ as you suggest > > > + > > static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); > > > > /* > > @@ -1172,6 +1186,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > { > > struct gendisk *disk; > > struct module *owner; > > + struct inode *inode; > > int ret; > > int partno; > > int perm = 0; > > @@ -1198,6 +1213,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > if (!disk) > > goto out; > > owner = disk->fops->owner; > > + inode = bdev->bd_inode; > > > > disk_block_events(disk); > > mutex_lock_nested(&bdev->bd_mutex, for_part); > > @@ -1206,9 +1222,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > bdev->bd_queue = disk->queue; > > bdev->bd_contains = bdev; > > if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) > > - bdev->bd_inode->i_flags = S_DAX; > > - else > > - bdev->bd_inode->i_flags = 0; > > + bd_add_dax(inode); > > > > if (!partno) { > > ret = -ENXIO; > > @@ -1228,6 +1242,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > bdev->bd_part = NULL; > > bdev->bd_disk = NULL; > > bdev->bd_queue = NULL; > > + bd_clear_dax(inode); > > mutex_unlock(&bdev->bd_mutex); > > disk_unblock_events(disk); > > put_disk(disk); > > @@ -1239,7 +1254,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > if (!ret) { > > bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); > > if (!blkdev_dax_capable(bdev)) > > - bdev->bd_inode->i_flags &= ~S_DAX; > > + bd_clear_dax(inode); > > } > > > > /* > > @@ -1276,7 +1291,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > } > > bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); > > if (!blkdev_dax_capable(bdev)) > > - bdev->bd_inode->i_flags &= ~S_DAX; > > + bd_clear_dax(inode); > > } > > } else { > > if (bdev->bd_contains == bdev) { > > @@ -1297,6 +1312,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > put_disk(disk); > > module_put(owner); > > } > > + inode_lock(inode); > > + if (inode->i_flags & S_DAX) > > + inode->i_flags = S_DAX; > > + inode_unlock(inode); > > + > > Clear all other flags if S_DAX is set? Why? Code today sets i_flags = S_DAX, clearing all other flags, so I figured that's how it's supposed to be. Though I can't see any reason it has to be that way. > > > bdev->bd_openers++; > > if (for_part) > > bdev->bd_part_count++; > > @@ -1309,6 +1329,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > bdev->bd_disk = NULL; > > bdev->bd_part = NULL; > > bdev->bd_queue = NULL; > > + bd_clear_dax(inode); > > Why? As far as I can tell, this path is only for cleanup when a whole block device or partition block device fails to get. Your question makes me think I don't understand this part of the code correctly, and admittedly I don't understand it all that well. All callers of out_clear set an error code on ret, so I assumed all instances of out_clear are errors. There's no reason to keep the S_DAX flag on the bdev this time around. If it resolves itself (for example, GENHD_FL_UP gets set), it can be set again on the next __blkdev_get pass. -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 17, 2016 at 12:34:57PM -0700, Dan Williams wrote: > On Tue, May 17, 2016 at 11:29 AM, Jon Derrick > <jonathan.derrick@intel.com> wrote: > > This patch fixes S_DAX bd_inode i_flag locking to conform to suggested > > A "fix" implies that its currently broken. I don't see how it is, not > until we add an ioctl method or other path that also tries to update > the flags outside of blkdev_get() context. So, I don't think this > patch stands on its own if you were intending it to be merged > separately. > > > locking rules. It is presumed that S_DAX is the only valid inode flag > > for a block device which subscribes to direct-access, and will restore > > any previously set flags if direct-access initialization fails. > > > > This reverts to i_flags behavior prior to > > bbab37ddc20bae4709bca8745c128c4f46fe63c5 > > by allowing other bd_inode flags when DAX is disabled > > > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > > --- > > fs/block_dev.c | 31 ++++++++++++++++++++++++++----- > > 1 file changed, 26 insertions(+), 5 deletions(-) > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c > > index 20a2c02..d41e37f 100644 > > --- a/fs/block_dev.c > > +++ b/fs/block_dev.c > > @@ -1159,6 +1159,20 @@ void bd_set_size(struct block_device *bdev, loff_t size) > > } > > EXPORT_SYMBOL(bd_set_size); > > > > +static void bd_add_dax(struct inode *inode) > > +{ > > + inode_lock(inode); > > + inode->i_flags |= S_DAX; > > + inode_unlock(inode); > > +} > > + > > +static void bd_clear_dax(struct inode *inode) > > +{ > > + inode_lock(inode); > > + inode->i_flags &= ~S_DAX; > > + inode_unlock(inode); > > +} > > Since this is inode generic should these helpers be prefixed "i_" > rather than "bd_"? Probably not, because in general filesystems are responsible for updating i_flags to reflect on-disk inode configuration and that's typically done under transaction contexts. e.g. through ioctl interfaces to set/clear flags that are stored on disk. As such, inode->i_flags is effectively protected by the filesystem specific locking heirarchy, not the generic inode_lock(). e.g. have a look at XFS storing a persistent "DAX-enabled" flag in the inode, which can be set/cleared on individual inodes dynamically by FS_IOC_FSSETXATTR. The XFS i_flags update function assumes exclusive access to the field as it is called under locked transaction context. Similar code exists in ext4, btrfs, gfs2, etc.... Cheers, Dave.
On Wed 18-05-16 08:58:42, Dave Chinner wrote: > On Tue, May 17, 2016 at 12:34:57PM -0700, Dan Williams wrote: > > On Tue, May 17, 2016 at 11:29 AM, Jon Derrick > > <jonathan.derrick@intel.com> wrote: > > > This patch fixes S_DAX bd_inode i_flag locking to conform to suggested > > > > A "fix" implies that its currently broken. I don't see how it is, not > > until we add an ioctl method or other path that also tries to update > > the flags outside of blkdev_get() context. So, I don't think this > > patch stands on its own if you were intending it to be merged > > separately. > > > > > locking rules. It is presumed that S_DAX is the only valid inode flag > > > for a block device which subscribes to direct-access, and will restore > > > any previously set flags if direct-access initialization fails. > > > > > > This reverts to i_flags behavior prior to > > > bbab37ddc20bae4709bca8745c128c4f46fe63c5 > > > by allowing other bd_inode flags when DAX is disabled > > > > > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > > > --- > > > fs/block_dev.c | 31 ++++++++++++++++++++++++++----- > > > 1 file changed, 26 insertions(+), 5 deletions(-) > > > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c > > > index 20a2c02..d41e37f 100644 > > > --- a/fs/block_dev.c > > > +++ b/fs/block_dev.c > > > @@ -1159,6 +1159,20 @@ void bd_set_size(struct block_device *bdev, loff_t size) > > > } > > > EXPORT_SYMBOL(bd_set_size); > > > > > > +static void bd_add_dax(struct inode *inode) > > > +{ > > > + inode_lock(inode); > > > + inode->i_flags |= S_DAX; > > > + inode_unlock(inode); > > > +} > > > + > > > +static void bd_clear_dax(struct inode *inode) > > > +{ > > > + inode_lock(inode); > > > + inode->i_flags &= ~S_DAX; > > > + inode_unlock(inode); > > > +} > > > > Since this is inode generic should these helpers be prefixed "i_" > > rather than "bd_"? > > Probably not, because in general filesystems are responsible for > updating i_flags to reflect on-disk inode configuration and that's > typically done under transaction contexts. e.g. through ioctl > interfaces to set/clear flags that are stored on disk. As such, > inode->i_flags is effectively protected by the filesystem specific > locking heirarchy, not the generic inode_lock(). > > e.g. have a look at XFS storing a persistent "DAX-enabled" flag in > the inode, which can be set/cleared on individual inodes dynamically > by FS_IOC_FSSETXATTR. The XFS i_flags update function assumes > exclusive access to the field as it is called under locked > transaction context. Similar code exists in ext4, btrfs, gfs2, > etc.... So in case of ext4, we actually do use inode_lock() to protect against racing IOC_SETFLAGS calls. i_flags is a strange mix and there are a few (like S_DEAD or S_NOSEC) which are not persistent and those get set / cleared by VFS. Some of those places (e.g. the clearing in __vfs_setxattr_noperm()) are not really controlled by the filesystem AFAICT. So when XFS doesn't use inode_lock() to protect i_flags updates it can race with VFS on i_flags updates. Honza
On Tue 17-05-16 14:38:05, Jon Derrick wrote: > On Tue, May 17, 2016 at 12:34:57PM -0700, Dan Williams wrote: > > On Tue, May 17, 2016 at 11:29 AM, Jon Derrick > > <jonathan.derrick@intel.com> wrote: > > > This patch fixes S_DAX bd_inode i_flag locking to conform to suggested > > > > A "fix" implies that its currently broken. I don't see how it is, not > > until we add an ioctl method or other path that also tries to update > > the flags outside of blkdev_get() context. So, I don't think this > > patch stands on its own if you were intending it to be merged > > separately. > Are there no other paths that can set/clear a bd_inode->i_flags? > If not, that's fine and I'll pack this into the next HIPRI set So for block device inodes I don't think anybody else is currently messing with i_flags but using some lock (and inode_lock() looks fine to me) for that seems like a reasonable future-proofing. But I think it is enough to send these patches together with your patch set if it introduces another user of i_flags in block device inodes. It is always better to see a concrete reason for the change. > > > + inode_lock(inode); > > > + if (inode->i_flags & S_DAX) > > > + inode->i_flags = S_DAX; > > > + inode_unlock(inode); > > > + > > > > Clear all other flags if S_DAX is set? Why? > > Code today sets i_flags = S_DAX, clearing all other flags, so I figured > that's how it's supposed to be. Though I can't see any reason it has to > be that way. IMO that's just over-cautious code and there's no good reason for that. Inodes are initialized with i_flags == 0. Some i_flags may be set by previous openers of the block device inode but then these are presumably consistent. So I'd just remove this zeroing. > > > @@ -1309,6 +1329,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) > > > bdev->bd_disk = NULL; > > > bdev->bd_part = NULL; > > > bdev->bd_queue = NULL; > > > + bd_clear_dax(inode); > > > > Why? > As far as I can tell, this path is only for cleanup when a whole block > device or partition block device fails to get. Your question makes me > think I don't understand this part of the code correctly, and admittedly > I don't understand it all that well. > > All callers of out_clear set an error code on ret, so I assumed all > instances of out_clear are errors. There's no reason to keep the S_DAX > flag on the bdev this time around. If it resolves itself (for example, > GENHD_FL_UP gets set), it can be set again on the next __blkdev_get pass. Yeah, I agree. When we fail to properly set up the block device inode, it probably makes sense to clear S_DAX just to be sure... Honza
On Wed, May 18, 2016 at 10:20:39AM +0200, Jan Kara wrote: > On Wed 18-05-16 08:58:42, Dave Chinner wrote: > > On Tue, May 17, 2016 at 12:34:57PM -0700, Dan Williams wrote: > > > On Tue, May 17, 2016 at 11:29 AM, Jon Derrick > > > <jonathan.derrick@intel.com> wrote: > > > > This patch fixes S_DAX bd_inode i_flag locking to conform to suggested > > > > > > A "fix" implies that its currently broken. I don't see how it is, not > > > until we add an ioctl method or other path that also tries to update > > > the flags outside of blkdev_get() context. So, I don't think this > > > patch stands on its own if you were intending it to be merged > > > separately. > > > > > > > locking rules. It is presumed that S_DAX is the only valid inode flag > > > > for a block device which subscribes to direct-access, and will restore > > > > any previously set flags if direct-access initialization fails. > > > > > > > > This reverts to i_flags behavior prior to > > > > bbab37ddc20bae4709bca8745c128c4f46fe63c5 > > > > by allowing other bd_inode flags when DAX is disabled > > > > > > > > Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> > > > > --- > > > > fs/block_dev.c | 31 ++++++++++++++++++++++++++----- > > > > 1 file changed, 26 insertions(+), 5 deletions(-) > > > > > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c > > > > index 20a2c02..d41e37f 100644 > > > > --- a/fs/block_dev.c > > > > +++ b/fs/block_dev.c > > > > @@ -1159,6 +1159,20 @@ void bd_set_size(struct block_device *bdev, loff_t size) > > > > } > > > > EXPORT_SYMBOL(bd_set_size); > > > > > > > > +static void bd_add_dax(struct inode *inode) > > > > +{ > > > > + inode_lock(inode); > > > > + inode->i_flags |= S_DAX; > > > > + inode_unlock(inode); > > > > +} > > > > + > > > > +static void bd_clear_dax(struct inode *inode) > > > > +{ > > > > + inode_lock(inode); > > > > + inode->i_flags &= ~S_DAX; > > > > + inode_unlock(inode); > > > > +} > > > > > > Since this is inode generic should these helpers be prefixed "i_" > > > rather than "bd_"? > > > > Probably not, because in general filesystems are responsible for > > updating i_flags to reflect on-disk inode configuration and that's > > typically done under transaction contexts. e.g. through ioctl > > interfaces to set/clear flags that are stored on disk. As such, > > inode->i_flags is effectively protected by the filesystem specific > > locking heirarchy, not the generic inode_lock(). > > > > e.g. have a look at XFS storing a persistent "DAX-enabled" flag in > > the inode, which can be set/cleared on individual inodes dynamically > > by FS_IOC_FSSETXATTR. The XFS i_flags update function assumes > > exclusive access to the field as it is called under locked > > transaction context. Similar code exists in ext4, btrfs, gfs2, > > etc.... > > So in case of ext4, we actually do use inode_lock() to protect against > racing IOC_SETFLAGS calls. i_flags is a strange mix and there are a few > (like S_DEAD or S_NOSEC) which are not persistent and those get set / > cleared by VFS. Some of those places (e.g. the clearing in > __vfs_setxattr_noperm()) are not really controlled by the filesystem > AFAICT. So when XFS doesn't use inode_lock() to protect i_flags updates it > can race with VFS on i_flags updates. There are several other filesystems with the same problem. It's not actually clear what they rules for i_flags are. The comment above inode_set_flags() is ambiguous at best, and even points this out: * In the long run, i_mutex is overkill, and we should probably look * at using the i_lock spinlock to protect i_flags, and then make sure * it is so documented in include/linux/fs.h and that all code follows * the locking convention!! Perhaps that should be done before the situation gets worse... Cheers Dave.
diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02..d41e37f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1159,6 +1159,20 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); +static void bd_add_dax(struct inode *inode) +{ + inode_lock(inode); + inode->i_flags |= S_DAX; + inode_unlock(inode); +} + +static void bd_clear_dax(struct inode *inode) +{ + inode_lock(inode); + inode->i_flags &= ~S_DAX; + inode_unlock(inode); +} + static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* @@ -1172,6 +1186,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; struct module *owner; + struct inode *inode; int ret; int partno; int perm = 0; @@ -1198,6 +1213,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!disk) goto out; owner = disk->fops->owner; + inode = bdev->bd_inode; disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); @@ -1206,9 +1222,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) - bdev->bd_inode->i_flags = S_DAX; - else - bdev->bd_inode->i_flags = 0; + bd_add_dax(inode); if (!partno) { ret = -ENXIO; @@ -1228,6 +1242,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_part = NULL; bdev->bd_disk = NULL; bdev->bd_queue = NULL; + bd_clear_dax(inode); mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); put_disk(disk); @@ -1239,7 +1254,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); if (!blkdev_dax_capable(bdev)) - bdev->bd_inode->i_flags &= ~S_DAX; + bd_clear_dax(inode); } /* @@ -1276,7 +1291,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); if (!blkdev_dax_capable(bdev)) - bdev->bd_inode->i_flags &= ~S_DAX; + bd_clear_dax(inode); } } else { if (bdev->bd_contains == bdev) { @@ -1297,6 +1312,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) put_disk(disk); module_put(owner); } + inode_lock(inode); + if (inode->i_flags & S_DAX) + inode->i_flags = S_DAX; + inode_unlock(inode); + bdev->bd_openers++; if (for_part) bdev->bd_part_count++; @@ -1309,6 +1329,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; + bd_clear_dax(inode); if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL;
This patch fixes S_DAX bd_inode i_flag locking to conform to suggested locking rules. It is presumed that S_DAX is the only valid inode flag for a block device which subscribes to direct-access, and will restore any previously set flags if direct-access initialization fails. This reverts to i_flags behavior prior to bbab37ddc20bae4709bca8745c128c4f46fe63c5 by allowing other bd_inode flags when DAX is disabled Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> --- fs/block_dev.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-)