From patchwork Mon Apr 29 04:53:50 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921199 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BE9311398 for ; Mon, 29 Apr 2019 04:55:08 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B1BE62625B for ; Mon, 29 Apr 2019 04:55:08 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id A5F48274A3; Mon, 29 Apr 2019 04:55:08 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 52DFE2625B for ; Mon, 29 Apr 2019 04:55:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726819AbfD2EyG (ORCPT ); Mon, 29 Apr 2019 00:54:06 -0400 Received: from mga03.intel.com ([134.134.136.65]:28440 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725468AbfD2EyF (ORCPT ); Mon, 29 Apr 2019 00:54:05 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:04 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566271" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:04 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 01/10] fs/locks: Add trace_leases_conflict Date: Sun, 28 Apr 2019 21:53:50 -0700 Message-Id: <20190429045359.8923-2-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny Signed-off-by: Ira Weiny --- fs/locks.c | 20 ++++++++++++++----- include/trace/events/filelock.h | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index eaa1cfaf73b0..4b66ed91fb53 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1528,11 +1528,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { - if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) - return false; - if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) - return false; - return locks_conflict(breaker, lease); + bool rc; + + if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { + rc = false; + goto trace; + } + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) { + rc = false; + goto trace; + } + + rc = locks_conflict(breaker, lease); +trace: + trace_leases_conflict(rc, lease, breaker); + return rc; } static bool diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index fad7befa612d..4b735923f2ff 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -203,6 +203,41 @@ TRACE_EVENT(generic_add_lease, show_fl_type(__entry->fl_type)) ); +TRACE_EVENT(leases_conflict, + TP_PROTO(bool conflict, struct file_lock *lease, struct file_lock *breaker), + + TP_ARGS(conflict, lease, breaker), + + TP_STRUCT__entry( + __field(void *, lease) + __field(void *, breaker) + __field(unsigned int, l_fl_flags) + __field(unsigned int, b_fl_flags) + __field(unsigned char, l_fl_type) + __field(unsigned char, b_fl_type) + __field(bool, conflict) + ), + + TP_fast_assign( + __entry->lease = lease; + __entry->l_fl_flags = lease->fl_flags; + __entry->l_fl_type = lease->fl_type; + __entry->breaker = breaker; + __entry->b_fl_flags = breaker->fl_flags; + __entry->b_fl_type = breaker->fl_type; + __entry->conflict = conflict; + ), + + TP_printk("conflict %d: lease=0x%p fl_flags=%s fl_type=%s; breaker=0x%p fl_flags=%s fl_type=%s", + __entry->conflict, + __entry->lease, + show_fl_flags(__entry->l_fl_flags), + show_fl_type(__entry->l_fl_type), + __entry->breaker, + show_fl_flags(__entry->b_fl_flags), + show_fl_type(__entry->b_fl_type)) +); + #endif /* _TRACE_FILELOCK_H */ /* This part must be outside protection */ From patchwork Mon Apr 29 04:53:51 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921193 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BC0551515 for ; Mon, 29 Apr 2019 04:54:48 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id AF0CB28779 for ; Mon, 29 Apr 2019 04:54:48 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id A2E5628783; Mon, 29 Apr 2019 04:54:48 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id EC06128779 for ; Mon, 29 Apr 2019 04:54:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1725468AbfD2EyJ (ORCPT ); Mon, 29 Apr 2019 00:54:09 -0400 Received: from mga03.intel.com ([134.134.136.65]:28440 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726937AbfD2EyI (ORCPT ); Mon, 29 Apr 2019 00:54:08 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:06 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566280" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:06 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 02/10] fs/locks: Introduce FL_LONGTERM file lease Date: Sun, 28 Apr 2019 21:53:51 -0700 Message-Id: <20190429045359.8923-3-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny GUP longterm pins of non-pagecache file system pages (FS DAX) are currently disallowed because they are unsafe. The danger for pinning these pages comes from the fact that hole punch and/or truncate of those files results in the pages being mapped and pinned by a user space process while DAX has potentially allocated those pages to other processes. Attempts to hold those pages in reserve defeat the purpose of allowing for FS truncate/hole punch should the user truely desire those operations. That said most users who are mapping FS DAX pages for long term pin purposes (such as RDMA) are not going to want to deallocate these pages while those pages are in use. To do so would mean the application would lose data. So the use case for allowing these operations of such pages seems limited. However, the kernel must protect itself and users from potential mistakes and or malicious user space code. Rather than disable long term pins as is done now. Allow for users who know they are going to be pinning this memory to alert the file system of this intention. Furthermore, allow them to be alerted if the pages they have pined are going away such that they can react. Example user space pseudocode for a user using RDMA and reacting to a lease break of this type would look like this: lease_break() { ... if (sigio.fd == rdma_fd) { ibv_dereg_mr(mr); close(rdma_fd); } } foo() { rdma_fd = open() fcntl(rdma_fd, F_SETLEASE, F_LONGTERM); sigaction(SIGIO, ... lease_break ...); ptr = mmap(rdma_fd, ...); mr = ibv_reg_mr(ptr, ...); } Follow on patches present 2 possible solutions to what to do should an application not take this lease. 1) failure to take the lease results in a failure of the ibv_reg_mr() (or other pin system call which results in GUP being called.) 2) failure to take the lease results in GUP taking the lease on behalf of the user. In both of these cases a failure to react and unpin the memory of the file in question will result in a SIGBUS being sent to the application holding the lease. This is slightly different behavior from what would happen if an application were to write to a hole punched area of a file but it still seems reasonable given that this operation is not allowed at all currently. This patch 1 of X... exports the FL_LONGTERM lease type to user space and implements taking this lease on a file. Follow on patches implement failing a longterm GUP as well as sending a SIGBUS. The last patch in the series removes the restriction of failing FOLL_LONGTERM for DAX operations. A follow on series (not yet completed) will remove the FOLL_LONGTERM restrictions within GUP for calls such as get_user_pages_locked because vma access is no longer required. RFC NOTEs / questions: Should F_LONGTERM be a "flag" of some sort OR'ed in with F_RDLCK? It was considered to use F_WRLCK vs F_RDLCK to indicate if the user was going to be writing vs reading from the file in question. However, in the end this does not matter as far as the FS is concerned. While internally we treat this as a F_RDLCK type the user should consider this a F_LONGTERM lease type which has no concept of read or write. FL_LAYOUT was not used because FL_LAYOUT lease break in XFS would have created a "chicken and the egg" problem. FL_LONGTERM must be broken and the ref counts of devmap page dropped to 1 before FL_LAYOUT could be broken. Not using FL_LAYOUT also makes it very clear we don't have issues conflicting with NFS code. Although I don't think that there would have been any conflict other than the XFS lease break order. The name "FL_LONGTERM" is probably not the best name for this feature. Alternative names are welcome. --- fs/locks.c | 38 +++++++++++++++++++++++++++----- include/linux/fs.h | 1 + include/uapi/asm-generic/fcntl.h | 2 ++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 4b66ed91fb53..8ea1c5713e6a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -610,7 +610,8 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, long type, struct file_lock *fl) +static int lease_init(struct file *filp, long type, unsigned int flags, + struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -620,6 +621,8 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) fl->fl_file = filp; fl->fl_flags = FL_LEASE; + if (flags & FL_LONGTERM) + fl->fl_flags |= FL_LONGTERM; fl->fl_start = 0; fl->fl_end = OFFSET_MAX; fl->fl_ops = NULL; @@ -628,7 +631,8 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, long type) +static struct file_lock *lease_alloc(struct file *filp, long type, + unsigned int flags) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; @@ -636,7 +640,7 @@ static struct file_lock *lease_alloc(struct file *filp, long type) if (fl == NULL) return ERR_PTR(error); - error = lease_init(filp, type, fl); + error = lease_init(filp, type, flags, fl); if (error) { locks_free_lock(fl); return ERR_PTR(error); @@ -1530,6 +1534,10 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { bool rc; + if ((breaker->fl_flags & FL_LONGTERM) != (lease->fl_flags & FL_LONGTERM)) { + rc = false; + goto trace; + } if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { rc = false; goto trace; @@ -1582,7 +1590,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); - new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); + new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK, 0); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); new_fl->fl_flags = type; @@ -1773,7 +1781,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags) int ret = 0; struct inode *inode = dentry->d_inode; - if (flags & FL_LAYOUT) + if (flags & FL_LAYOUT || flags & FL_LONGTERM) return 0; if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) @@ -2009,8 +2017,26 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) struct file_lock *fl; struct fasync_struct *new; int error; + unsigned int flags = 0; + + /* + * NOTE on F_LONGTERM lease + * + * LONGTERM lease types are taken on files which the user knows that + * they will be pinning in memory for some indeterminate amount of + * time. Such as for use with RDMA. While we don't know what user + * space is going to do with the file we still use a F_RDLOCK level of + * lease. This ensures that there are no conflicts between + * 2 users. The conflict should only come from the File system wanting + * to revoke the lease in break_layout() And this is done by using + * F_WRLCK in the break code. + */ + if (arg == F_LONGTERM) { + arg = F_RDLCK; + flags = FL_LONGTERM; + } - fl = lease_alloc(filp, arg); + fl = lease_alloc(filp, arg, flags); if (IS_ERR(fl)) return PTR_ERR(fl); diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b42df09b04c..ace21c6feb19 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -991,6 +991,7 @@ static inline struct file *get_file(struct file *f) #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_LONGTERM 4096 /* user held pin */ #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 9dc0bf0c5a6e..9938ebc24adf 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -174,6 +174,8 @@ struct f_owner_ex { #define F_SHLCK 8 /* or 4 */ #endif +#define F_LONGTERM 16 /* lease to allow longterm GUP */ + /* operations for bsd flock(), also used by the kernel implementation */ #define LOCK_SH 1 /* shared lock */ #define LOCK_EX 2 /* exclusive lock */ From patchwork Mon Apr 29 04:53:52 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921163 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 62C271398 for ; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 552C22877B for ; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4903F28786; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 31CD32877B for ; Mon, 29 Apr 2019 04:54:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727121AbfD2EyI (ORCPT ); Mon, 29 Apr 2019 00:54:08 -0400 Received: from mga03.intel.com ([134.134.136.65]:28441 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727031AbfD2EyH (ORCPT ); Mon, 29 Apr 2019 00:54:07 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:07 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566285" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:06 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 03/10] mm/gup: Pass flags down to __gup_device_huge* calls Date: Sun, 28 Apr 2019 21:53:52 -0700 Message-Id: <20190429045359.8923-4-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny In order to support taking and/or checking for a LONGTERM lease on a FS DAX inode these calls need to know if FOLL_LONGTERM was specified. This patch passes the flags down but does not use them. It does this in prep for 2 future patches. --- mm/gup.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 42680823fbbe..a8ac75bc1452 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1853,7 +1853,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, #if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __gup_device_huge(unsigned long pfn, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { int nr_start = *nr; struct dev_pagemap *pgmap = NULL; @@ -1886,30 +1887,33 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, } static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { unsigned long fault_pfn; int nr_start = *nr; fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - if (!__gup_device_huge(fault_pfn, addr, end, pages, nr)) + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags)) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { undo_dev_pagemap(nr, nr_start, pages); return 0; } + return 1; } static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { unsigned long fault_pfn; int nr_start = *nr; fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - if (!__gup_device_huge(fault_pfn, addr, end, pages, nr)) + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags)) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { @@ -1920,14 +1924,16 @@ static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, } #else static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { BUILD_BUG(); return 0; } static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { BUILD_BUG(); return 0; @@ -1946,7 +1952,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (pmd_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; - return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr); + return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr, + flags); } refs = 0; @@ -1988,7 +1995,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, if (pud_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; - return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr); + return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr, + flags); } refs = 0; From patchwork Mon Apr 29 04:53:53 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921165 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CAD471575 for ; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id BBC2B28779 for ; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id AFA4528783; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2164C28779 for ; Mon, 29 Apr 2019 04:54:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727215AbfD2EyJ (ORCPT ); Mon, 29 Apr 2019 00:54:09 -0400 Received: from mga03.intel.com ([134.134.136.65]:28441 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727083AbfD2EyI (ORCPT ); Mon, 29 Apr 2019 00:54:08 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:07 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566289" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:07 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 04/10] WIP: mm/gup: Ensure F_LONGTERM lease is held on GUP pages Date: Sun, 28 Apr 2019 21:53:53 -0700 Message-Id: <20190429045359.8923-5-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny Honestly I think I should remove this patch. It is removed later in the series and ensuring the lease is there at GUP time does not guarantee the lease is held. The user could remove the lease??? Regardless the code in GUP to take the lease holds it even if the user does try to remove it and will take the lease back if they race and the lease is remove prior to the GUP getting a reference to it... So pretty much anyway you slice it this patch is not needed... FOLL_LONGTERM pins are currently disabled for GUP calls which map to FS DAX files. As an alternative allow these files to be mapped if the user has taken a F_LONGTERM lease on the file. The intention is that the user is aware of the dangers of file truncated/hole punch and accepts file which has been mapped this way (such as is done with RDMA) and they have taken this lease to indicate they will accept the behavior if the filesystem needs to take action. Example user space pseudocode for a user using RDMA and reacting to a lease break of this type would look like this: lease_break() { ... if (sigio.fd == rdma_fd) { ibv_dereg_mr(mr); close(rdma_fd); } } foo() { rdma_fd = open() fcntl(rdma_fd, F_SETLEASE, F_LONGTERM); sigaction(SIGIO, ... lease_break ...); ptr = mmap(rdma_fd, ...); mr = ibv_reg_mr(ptr, ...); } Failure to process the SIGIO as above will result in a SIGBUS being given to the process. SIGBUS is implemented in later patches. This patch X of Y fails the FOLL_LONGTERM pin if the FL_LONGTERM lease is not held. --- fs/locks.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 2 ++ mm/gup.c | 13 +++++++++++++ mm/huge_memory.c | 20 ++++++++++++++++++++ 4 files changed, 82 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 8ea1c5713e6a..31c8b761a578 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2939,3 +2939,50 @@ static int __init filelock_init(void) return 0; } core_initcall(filelock_init); + +// FIXME what about GUP calls to Device DAX??? +// I believe they will still return true for *_devmap +// +// return true if the page has a LONGTERM lease associated with it's file. +bool mapping_inode_has_longterm(struct page *page) +{ + bool ret; + struct inode *inode; + struct file_lock *fl; + struct file_lock_context *ctx; + + /* + * should never be here unless we are a "page cache" page without a + * page cache. + */ + if (WARN_ON(PageAnon(page))) + return false; + if (WARN_ON(!page)) + return false; + if (WARN_ON(!page->mapping)) + return false; + if (WARN_ON(!page->mapping->host)) + return false; + + /* Ensure page->mapping isn't freed while we look at it */ + /* FIXME mm lock is held here I think? so is this really needed? */ + rcu_read_lock(); + inode = page->mapping->host; + + ctx = locks_get_lock_context(inode, F_RDLCK); + + ret = false; + spin_lock(&ctx->flc_lock); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_flags & FL_LONGTERM) { + ret = true; + break; + } + } + spin_unlock(&ctx->flc_lock); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(mapping_inode_has_longterm); + diff --git a/include/linux/mm.h b/include/linux/mm.h index 77e34ec5dfbe..cde359e71b7b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1572,6 +1572,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +bool mapping_inode_has_longterm(struct page *page); + /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ diff --git a/mm/gup.c b/mm/gup.c index a8ac75bc1452..5ae1dd31a58d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -292,6 +292,12 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, page = pte_page(pte); else goto no_page; + + if (unlikely(flags & FOLL_LONGTERM) && + !mapping_inode_has_longterm(page)) { + page = ERR_PTR(-EINVAL); + goto out; + } } else if (unlikely(!page)) { if (flags & FOLL_DUMP) { /* Avoid special (like zero) pages in core dumps */ @@ -1869,6 +1875,13 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, } SetPageReferenced(page); pages[*nr] = page; + + if (unlikely(flags & FOLL_LONGTERM) && + !mapping_inode_has_longterm(page)) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + if (get_gup_pin_page(page)) { undo_dev_pagemap(nr, nr_start, pages); return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 404acdcd0455..8819624c740f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -910,6 +910,16 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); + + // Check for Layout lease. + // FIXME combine logic + if (unlikely(flags & FOLL_LONGTERM)) { + WARN_ON_ONCE(PageAnon(page)); + if (!mapping_inode_has_longterm(page)) { + return NULL; + } + } + get_page(page); return page; @@ -1050,6 +1060,16 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); + + // Check for LONGTERM lease. + // FIXME combine logic remove Warn + if (unlikely(flags & FOLL_LONGTERM)) { + WARN_ON_ONCE(PageAnon(page)); + if (!mapping_inode_has_longterm(page)) { + return NULL; + } + } + get_page(page); return page; From patchwork Mon Apr 29 04:53:54 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921195 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A443C1515 for ; Mon, 29 Apr 2019 04:55:03 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 92B0D2625B for ; Mon, 29 Apr 2019 04:55:03 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 7EE40274A3; Mon, 29 Apr 2019 04:55:03 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 5AA7E2625B for ; Mon, 29 Apr 2019 04:55:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727420AbfD2Eyx (ORCPT ); Mon, 29 Apr 2019 00:54:53 -0400 Received: from mga03.intel.com ([134.134.136.65]:28440 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727112AbfD2EyI (ORCPT ); Mon, 29 Apr 2019 00:54:08 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:08 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566293" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:07 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 05/10] mm/gup: Take FL_LONGTERM lease if not set by user Date: Sun, 28 Apr 2019 21:53:54 -0700 Message-Id: <20190429045359.8923-6-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny If a user has failed to take a F_LONGTERM lease on a file and they do a longterm pin on the pages associated with a file, take a FL_LONGTERM lease for them. If the user has not taken a lease on the file they are trying to pin create a FL_LONGTERM lease and attach it to the inode associated with the memory being pinned. If the user has already taken a lease ref count the lease such that it will not be removed until all the GUP pins have been removed. This prevents the user from removing the GUP lease and tricking the kernel into thinking the memory is free. Follow on patches will send a SIGBUS if the user does not remove their GUP pins and the FS needs the pages in question. This should only happen if they have not planned the use of the file correctly and are allowing other processes to truncate/hold punch a file they are actively trying to access. This is similar to what would happen if the memory was accessed through a regular CPU instruction with a couple of exceptions. 1) The SIGBUS is sent when the memory becomes invalid rather than waiting for an access by the process. This is because we don't know when the device may try to access the page. So we assume that the page gets "accessed immediately." 2) Hole punch is treated like a truncate. As such SIGBUS is sent rather than attempting to allocate file space as a normal CPU access would. --- fs/locks.c | 179 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/fs.h | 4 + mm/gup.c | 7 +- mm/huge_memory.c | 6 +- 4 files changed, 187 insertions(+), 9 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 31c8b761a578..ae508d192223 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -343,8 +343,10 @@ struct file_lock *locks_alloc_lock(void) { struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); - if (fl) + if (fl) { locks_init_lock_heads(fl); + kref_init(&fl->gup_ref); + } return fl; } @@ -607,6 +609,14 @@ static const struct lock_manager_operations lease_manager_ops = { .lm_setup = lease_setup, }; +static int lease_modify_longterm(struct file_lock *fl, int arg, + struct list_head *dispose); +static const struct lock_manager_operations lease_longterm_ops = { + .lm_break = lease_break_callback, + .lm_change = lease_modify_longterm, + .lm_setup = lease_setup, +}; + /* * Initialize a lease, use the default lock manager operations */ @@ -621,12 +631,15 @@ static int lease_init(struct file *filp, long type, unsigned int flags, fl->fl_file = filp; fl->fl_flags = FL_LEASE; - if (flags & FL_LONGTERM) + if (flags & FL_LONGTERM) { fl->fl_flags |= FL_LONGTERM; + fl->fl_lmops = &lease_longterm_ops; + } else { + fl->fl_lmops = &lease_manager_ops; + } fl->fl_start = 0; fl->fl_end = OFFSET_MAX; fl->fl_ops = NULL; - fl->fl_lmops = &lease_manager_ops; return 0; } @@ -1506,6 +1519,55 @@ int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) } EXPORT_SYMBOL(lease_modify); +static void release_longterm_lease(struct kref *kref) +{ + struct file_lock *fl = container_of(kref, struct file_lock, gup_ref); + + locks_delete_lock_ctx(fl, NULL); +} + +/* + * LONGTERM leases are special in that they may be held by the GUP code and + * therefore can't be modified in the same way as regular file leases. + * + * Specifically the lease is refcounted by GUP based on the number of pages are + * which want to hold the lease. + */ +static int lease_modify_longterm(struct file_lock *fl, int arg, + struct list_head *dispose) +{ + int error = assign_type(fl, arg); + + if (error) + return error; + lease_clear_pending(fl, arg); + locks_wake_up_blocks(fl); + + if (arg == F_UNLCK) { + struct file *filp = fl->fl_file; + + /* + * Users who take the longterm lease get a reference to it. + * This modify will remove that reference if it exists. But + * only that reference. This means that the GUP code must exit + * before the LONGTERM lease will be fully removed. + */ + if (filp) { + f_delown(filp); + filp->f_owner.signum = 0; + + fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); + if (fl->fl_fasync != NULL) { + printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); + fl->fl_fasync = NULL; + } + + kref_put(&fl->gup_ref, release_longterm_lease); + } + } + return 0; +} + static bool past_time(unsigned long then) { if (!then) @@ -1794,6 +1856,33 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags) return ret; } +/* + * Note the locks could eventually be optimized to lock over smaller areas + * of the file. But for now we do this per inode. + * + * The rational is due to the most common use case where we don't expect users + * to to be removing any of the pages of the file while it is being used by the + * longterm pin. Should the user want to alter the file in this way they will + * be required to release the pins alter the file and restablish the pins. + * + * inode->i_flctx->flc_lock must be held. + */ +static struct file_lock *find_longterm_lease(struct inode *inode) +{ + struct file_lock *ret = NULL; + struct file_lock *fl; + + list_for_each_entry(fl, &inode->i_flctx->flc_lease, fl_list) { + if (fl->fl_flags & FL_LONGTERM && + fl->fl_pid == current->tgid) { + ret = fl; + break; + } + } + + return ret; +} + static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { @@ -2986,3 +3075,87 @@ bool mapping_inode_has_longterm(struct page *page) } EXPORT_SYMBOL_GPL(mapping_inode_has_longterm); +/* + * if the user has not already taken a longterm lease on a devmap FS page do it + * for them. + * + * Heavily borrowed frem the NFS code. + */ +bool page_set_longterm_lease(struct page *page) +{ + struct file_lock_context *ctx; + struct inode *inode; + struct file_lock *new_fl, *existing_fl; + + /* + * We should never be here unless we are a "page cache" page + * And we are a devm managed page + */ + if (WARN_ON(!page) || + WARN_ON(PageAnon(page)) || + WARN_ON(!page->mapping) || + WARN_ON(!page->mapping->host) || + WARN_ON(!page_is_devmap_managed(page))) + return false; + + new_fl = lease_alloc(NULL, F_RDLCK, FL_LONGTERM); + if (IS_ERR(new_fl)) + return false; + + /* Ensure page->mapping isn't freed while we look at it */ + /* No locking needed... mm sem is held. */ + inode = page->mapping->host; + + ctx = locks_get_lock_context(inode, F_RDLCK); + percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + + existing_fl = find_longterm_lease(inode); + if (!existing_fl) { + existing_fl = new_fl; + locks_insert_lock_ctx(new_fl, &ctx->flc_lease); + } else { + kref_get(&existing_fl->gup_ref); + } + + spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); + + if (existing_fl != new_fl) + locks_free_lock(new_fl); + + return true; +} +EXPORT_SYMBOL_GPL(page_set_longterm_lease); + +void page_remove_longterm_lease(struct page *page) +{ + struct file_lock_context *ctx; + struct inode *inode; + struct file_lock *found; + + /* + * We should never be here unless we are a "page cache" page + * And we are a devm managed page + */ + if (WARN_ON(!page) || + WARN_ON(PageAnon(page)) || + WARN_ON(!page->mapping) || + WARN_ON(!page->mapping->host) || + WARN_ON(!page_is_devmap_managed(page))) + return; + + inode = page->mapping->host; + + ctx = locks_get_lock_context(inode, F_RDLCK); + + found = NULL; + percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + found = find_longterm_lease(inode); + if (found) + kref_put(&found->gup_ref, release_longterm_lease); + spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); +} +EXPORT_SYMBOL_GPL(page_remove_longterm_lease); diff --git a/include/linux/fs.h b/include/linux/fs.h index ace21c6feb19..be2d08080aa5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -1093,6 +1094,7 @@ struct file_lock { int state; /* state of grant or error if -ve */ } afs; } fl_u; + struct kref gup_ref; } __randomize_layout; struct file_lock_context { @@ -1152,6 +1154,8 @@ extern int lease_modify(struct file_lock *, int, struct list_head *); struct files_struct; extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); +bool page_set_longterm_lease(struct page *page); +void page_remove_longterm_lease(struct page *page); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) diff --git a/mm/gup.c b/mm/gup.c index 5ae1dd31a58d..1ee17f2339f7 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -51,6 +51,9 @@ void put_user_page(struct page *page) { page = compound_head(page); + if (page_is_devmap_managed(page)) + page_remove_longterm_lease(page); + /* * For devmap managed pages we need to catch refcount transition from * GUP_PIN_COUNTING_BIAS to 1, when refcount reach one it means the @@ -294,7 +297,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, goto no_page; if (unlikely(flags & FOLL_LONGTERM) && - !mapping_inode_has_longterm(page)) { + !page_set_longterm_lease(page)) { page = ERR_PTR(-EINVAL); goto out; } @@ -1877,7 +1880,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, pages[*nr] = page; if (unlikely(flags & FOLL_LONGTERM) && - !mapping_inode_has_longterm(page)) { + !page_set_longterm_lease(page)) { undo_dev_pagemap(nr, nr_start, pages); return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8819624c740f..6a8c039fe6ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -915,9 +915,8 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, // FIXME combine logic if (unlikely(flags & FOLL_LONGTERM)) { WARN_ON_ONCE(PageAnon(page)); - if (!mapping_inode_has_longterm(page)) { + if (!page_set_longterm_lease(page)) return NULL; - } } get_page(page); @@ -1065,9 +1064,8 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, // FIXME combine logic remove Warn if (unlikely(flags & FOLL_LONGTERM)) { WARN_ON_ONCE(PageAnon(page)); - if (!mapping_inode_has_longterm(page)) { + if (!page_set_longterm_lease(page)) return NULL; - } } get_page(page); From patchwork Mon Apr 29 04:53:55 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921197 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AB7741575 for ; Mon, 29 Apr 2019 04:55:04 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 9E74C2625B for ; Mon, 29 Apr 2019 04:55:04 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 92711274A3; Mon, 29 Apr 2019 04:55:04 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 3FFCB2625B for ; Mon, 29 Apr 2019 04:55:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727416AbfD2Eyx (ORCPT ); Mon, 29 Apr 2019 00:54:53 -0400 Received: from mga03.intel.com ([134.134.136.65]:28445 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727162AbfD2EyJ (ORCPT ); Mon, 29 Apr 2019 00:54:09 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:08 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566299" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:08 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 06/10] fs/locks: Add longterm lease traces Date: Sun, 28 Apr 2019 21:53:55 -0700 Message-Id: <20190429045359.8923-7-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny --- fs/locks.c | 5 +++++ include/trace/events/filelock.h | 37 ++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index ae508d192223..58c6d7a411b6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2136,6 +2136,8 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) } new->fa_fd = fd; + trace_take_longterm_lease(fl); + error = vfs_setlease(filp, arg, &fl, (void **)&new); if (fl) locks_free_lock(fl); @@ -3118,6 +3120,8 @@ bool page_set_longterm_lease(struct page *page) kref_get(&existing_fl->gup_ref); } + trace_take_longterm_lease(existing_fl); + spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); @@ -3153,6 +3157,7 @@ void page_remove_longterm_lease(struct page *page) percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); found = find_longterm_lease(inode); + trace_release_longterm_lease(found); if (found) kref_put(&found->gup_ref, release_longterm_lease); spin_unlock(&ctx->flc_lock); diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index 4b735923f2ff..c6f39f03cb8b 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -27,7 +27,8 @@ { FL_SLEEP, "FL_SLEEP" }, \ { FL_DOWNGRADE_PENDING, "FL_DOWNGRADE_PENDING" }, \ { FL_UNLOCK_PENDING, "FL_UNLOCK_PENDING" }, \ - { FL_OFDLCK, "FL_OFDLCK" }) + { FL_OFDLCK, "FL_OFDLCK" }, \ + { FL_LONGTERM, "FL_LONGTERM" }) #define show_fl_type(val) \ __print_symbolic(val, \ @@ -238,6 +239,40 @@ TRACE_EVENT(leases_conflict, show_fl_type(__entry->b_fl_type)) ); +DECLARE_EVENT_CLASS(longterm_lease, + TP_PROTO(struct file_lock *fl), + + TP_ARGS(fl), + + TP_STRUCT__entry( + __field(void *, fl) + __field(void *, owner) + __field(unsigned int, fl_flags) + __field(unsigned int, cnt) + __field(unsigned char, fl_type) + ), + + TP_fast_assign( + __entry->fl = fl; + __entry->owner = fl ? fl->fl_owner : NULL; + __entry->fl_flags = fl ? fl->fl_flags : 0; + __entry->cnt = fl ? kref_read(&fl->gup_ref) : 0; + __entry->fl_type = fl ? fl->fl_type : 0; + ), + + TP_printk("owner=0x%p fl=%p(%d) fl_flags=%s fl_type=%s", + __entry->owner, __entry->fl, __entry->cnt, + show_fl_flags(__entry->fl_flags), + show_fl_type(__entry->fl_type)) +); +DEFINE_EVENT(longterm_lease, take_longterm_lease, + TP_PROTO(struct file_lock *fl), + TP_ARGS(fl)); +DEFINE_EVENT(longterm_lease, release_longterm_lease, + TP_PROTO(struct file_lock *fl), + TP_ARGS(fl)); + + #endif /* _TRACE_FILELOCK_H */ /* This part must be outside protection */ From patchwork Mon Apr 29 04:53:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921167 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7974F1398 for ; Mon, 29 Apr 2019 04:54:12 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6AB102877B for ; Mon, 29 Apr 2019 04:54:12 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 5ED6B28783; Mon, 29 Apr 2019 04:54:12 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 05B8728779 for ; Mon, 29 Apr 2019 04:54:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727249AbfD2EyK (ORCPT ); Mon, 29 Apr 2019 00:54:10 -0400 Received: from mga03.intel.com ([134.134.136.65]:28445 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727168AbfD2EyJ (ORCPT ); Mon, 29 Apr 2019 00:54:09 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:09 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566306" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:08 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 07/10] fs/dax: Create function dax_mapping_is_dax() Date: Sun, 28 Apr 2019 21:53:56 -0700 Message-Id: <20190429045359.8923-8-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny In order to support longterm lease breaking operations. Lease break code in the file systems need to know if a mapping is DAX. Split out the logic to determine if a mapping is DAX and export it. --- fs/dax.c | 23 ++++++++++++++++------- include/linux/dax.h | 6 ++++++ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index ca0671d55aa6..c3a932235e88 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -551,6 +551,21 @@ static void *grab_mapping_entry(struct xa_state *xas, return xa_mk_internal(VM_FAULT_FALLBACK); } +bool dax_mapping_is_dax(struct address_space *mapping) +{ + /* + * In the 'limited' case get_user_pages() for dax is disabled. + */ + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) + return false; + + if (!dax_mapping(mapping) || !mapping_mapped(mapping)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(dax_mapping_is_dax); + /** * dax_layout_busy_page - find first pinned page in @mapping * @mapping: address space to scan for a page with ref count > 1 @@ -573,13 +588,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) unsigned int scanned = 0; struct page *page = NULL; - /* - * In the 'limited' case get_user_pages() for dax is disabled. - */ - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return NULL; - - if (!dax_mapping(mapping) || !mapping_mapped(mapping)) + if (!dax_mapping_is_dax(mapping)) return NULL; /* diff --git a/include/linux/dax.h b/include/linux/dax.h index 0dd316a74a29..78fea21b990e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -89,6 +89,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); +bool dax_mapping_is_dax(struct address_space *mapping); struct page *dax_layout_busy_page(struct address_space *mapping); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); @@ -113,6 +114,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) return NULL; } +bool dax_mapping_is_dax(struct address_space *mapping) +{ + return false; +} + static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; From patchwork Mon Apr 29 04:53:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921191 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 68E3F1398 for ; Mon, 29 Apr 2019 04:54:46 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 5B8F228779 for ; Mon, 29 Apr 2019 04:54:46 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4FCEB28783; Mon, 29 Apr 2019 04:54:46 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id ECDC228779 for ; Mon, 29 Apr 2019 04:54:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727243AbfD2Eyk (ORCPT ); Mon, 29 Apr 2019 00:54:40 -0400 Received: from mga03.intel.com ([134.134.136.65]:28445 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727230AbfD2EyK (ORCPT ); Mon, 29 Apr 2019 00:54:10 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:09 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566310" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:09 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 08/10] mm/gup: fs: Send SIGBUS on truncate of active file Date: Sun, 28 Apr 2019 21:53:57 -0700 Message-Id: <20190429045359.8923-9-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny Now that the taking of LONGTERM leases is in place we can now facilitate sending a SIGBUS to process if a file truncate or hole punch is performed and they do not respond by releasing the lease. The standard file lease_break_time is used to time out the LONGTERM lease which is in place on the inode. --- fs/ext4/inode.c | 4 ++++ fs/locks.c | 13 +++++++++++-- fs/xfs/xfs_file.c | 4 ++++ include/linux/fs.h | 13 +++++++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b32a57bc5d5d..bee456c8c805 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4237,6 +4237,10 @@ int ext4_break_layouts(struct inode *inode) if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) return -EINVAL; + /* Break longterm leases */ + if (dax_mapping_is_dax(inode->i_mapping)) + break_longterm(inode); + do { page = dax_layout_busy_page(inode->i_mapping); if (!page) diff --git a/fs/locks.c b/fs/locks.c index 58c6d7a411b6..c77eee081d11 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1580,6 +1580,7 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) { struct file_lock_context *ctx = inode->i_flctx; struct file_lock *fl, *tmp; + struct task_struct *tsk; lockdep_assert_held(&ctx->flc_lock); @@ -1587,8 +1588,16 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) lease_modify(fl, F_RDLCK, dispose); - if (past_time(fl->fl_break_time)) - lease_modify(fl, F_UNLCK, dispose); + if (past_time(fl->fl_break_time)) { + if (fl->fl_flags & FL_LONGTERM) { + tsk = find_task_by_vpid(fl->fl_pid); + fl->fl_break_time = 1 + jiffies + lease_break_time * HZ; + lease_modify_longterm(fl, F_UNLCK, dispose); + kill_pid(tsk->thread_pid, SIGBUS, 0); + } else { + lease_modify(fl, F_UNLCK, dispose); + } + } } } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1f2e2845eb76..ebd310f3ae65 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -739,6 +739,10 @@ xfs_break_dax_layouts( ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); + /* Break longterm leases */ + if (dax_mapping_is_dax(inode->i_mapping)) + break_longterm(inode); + page = dax_layout_busy_page(inode->i_mapping); if (!page) return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index be2d08080aa5..0e8b21240a71 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2459,6 +2459,14 @@ static inline int break_layout(struct inode *inode, bool wait) return 0; } +static inline int break_longterm(struct inode *inode) +{ + smp_mb(); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + return __break_lease(inode, O_WRONLY, FL_LONGTERM); + return 0; +} + #else /* !CONFIG_FILE_LOCKING */ static inline int break_lease(struct inode *inode, unsigned int mode) { @@ -2486,6 +2494,11 @@ static inline int break_layout(struct inode *inode, bool wait) return 0; } +static inline int break_longterm(struct inode *inode, bool wait) +{ + return 0; +} + #endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ From patchwork Mon Apr 29 04:53:58 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921183 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DAE821398 for ; Mon, 29 Apr 2019 04:54:30 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id CC5F02877B for ; Mon, 29 Apr 2019 04:54:30 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C066A28783; Mon, 29 Apr 2019 04:54:30 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 61E1528779 for ; Mon, 29 Apr 2019 04:54:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727364AbfD2Ey3 (ORCPT ); Mon, 29 Apr 2019 00:54:29 -0400 Received: from mga03.intel.com ([134.134.136.65]:28446 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727243AbfD2EyK (ORCPT ); Mon, 29 Apr 2019 00:54:10 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:10 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566315" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:09 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 09/10] fs/locks: Add tracepoint for SIGBUS on LONGTERM expiration Date: Sun, 28 Apr 2019 21:53:58 -0700 Message-Id: <20190429045359.8923-10-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny --- fs/locks.c | 1 + include/trace/events/filelock.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index c77eee081d11..42b96bfc71fa 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1592,6 +1592,7 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) if (fl->fl_flags & FL_LONGTERM) { tsk = find_task_by_vpid(fl->fl_pid); fl->fl_break_time = 1 + jiffies + lease_break_time * HZ; + trace_longterm_sigbus(fl); lease_modify_longterm(fl, F_UNLCK, dispose); kill_pid(tsk->thread_pid, SIGBUS, 0); } else { diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index c6f39f03cb8b..626386dbe599 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -271,7 +271,9 @@ DEFINE_EVENT(longterm_lease, take_longterm_lease, DEFINE_EVENT(longterm_lease, release_longterm_lease, TP_PROTO(struct file_lock *fl), TP_ARGS(fl)); - +DEFINE_EVENT(longterm_lease, longterm_sigbus, + TP_PROTO(struct file_lock *fl), + TP_ARGS(fl)); #endif /* _TRACE_FILELOCK_H */ From patchwork Mon Apr 29 04:53:59 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 10921189 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6D7301398 for ; Mon, 29 Apr 2019 04:54:40 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6049028779 for ; Mon, 29 Apr 2019 04:54:40 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 53DBC28783; Mon, 29 Apr 2019 04:54:40 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id EE21428779 for ; Mon, 29 Apr 2019 04:54:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727354AbfD2Ey2 (ORCPT ); Mon, 29 Apr 2019 00:54:28 -0400 Received: from mga03.intel.com ([134.134.136.65]:28447 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727261AbfD2EyL (ORCPT ); Mon, 29 Apr 2019 00:54:11 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 Apr 2019 21:54:10 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,408,1549958400"; d="scan'208";a="146566319" Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga003.jf.intel.com with ESMTP; 28 Apr 2019 21:54:10 -0700 From: ira.weiny@intel.com To: lsf-pc@lists.linux-foundation.org Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, Dan Williams , Jan Kara , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , John Hubbard , Michal Hocko , Ira Weiny Subject: [RFC PATCH 10/10] mm/gup: Remove FOLL_LONGTERM DAX exclusion Date: Sun, 28 Apr 2019 21:53:59 -0700 Message-Id: <20190429045359.8923-11-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190429045359.8923-1-ira.weiny@intel.com> References: <20190429045359.8923-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Ira Weiny Now that there is a mechanism for users to safely take LONGTERM pins on FS DAX pages. Remove the FS DAX exclusion from GUP with FOLL_LONGTERM. Special processing remains in effect for CONFIG_CMA --- mm/gup.c | 65 ++++++-------------------------------------------------- 1 file changed, 6 insertions(+), 59 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 1ee17f2339f7..cf6863422cb9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1324,26 +1324,6 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages_remote); -#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA) -static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) -{ - long i; - struct vm_area_struct *vma_prev = NULL; - - for (i = 0; i < nr_pages; i++) { - struct vm_area_struct *vma = vmas[i]; - - if (vma == vma_prev) - continue; - - vma_prev = vma; - - if (vma_is_fsdax(vma)) - return true; - } - return false; -} - #ifdef CONFIG_CMA static struct page *new_non_cma_page(struct page *page, unsigned long private) { @@ -1474,18 +1454,6 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, return nr_pages; } -#else -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, - unsigned long start, - unsigned long nr_pages, - struct page **pages, - struct vm_area_struct **vmas, - unsigned int gup_flags) -{ - return nr_pages; -} -#endif /* * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which @@ -1499,49 +1467,28 @@ static long __gup_longterm_locked(struct task_struct *tsk, struct vm_area_struct **vmas, unsigned int gup_flags) { - struct vm_area_struct **vmas_tmp = vmas; unsigned long flags = 0; - long rc, i; + long rc; - if (gup_flags & FOLL_LONGTERM) { - if (!pages) - return -EINVAL; - - if (!vmas_tmp) { - vmas_tmp = kcalloc(nr_pages, - sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!vmas_tmp) - return -ENOMEM; - } + if (flags & FOLL_LONGTERM) flags = memalloc_nocma_save(); - } rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, - vmas_tmp, NULL, gup_flags); + vmas, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { memalloc_nocma_restore(flags); if (rc < 0) goto out; - if (check_dax_vmas(vmas_tmp, rc)) { - for (i = 0; i < rc; i++) - put_page(pages[i]); - rc = -EOPNOTSUPP; - goto out; - } - rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, - vmas_tmp, gup_flags); + vmas, gup_flags); } out: - if (vmas_tmp != vmas) - kfree(vmas_tmp); return rc; } -#else /* !CONFIG_FS_DAX && !CONFIG_CMA */ +#else /* !CONFIG_CMA */ static __always_inline long __gup_longterm_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, @@ -1553,7 +1500,7 @@ static __always_inline long __gup_longterm_locked(struct task_struct *tsk, return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, NULL, flags); } -#endif /* CONFIG_FS_DAX || CONFIG_CMA */ +#endif /* CONFIG_CMA */ /* * This is the same as get_user_pages_remote(), just with a