diff mbox series

[6/6] cifs: do not cache leased directories for longer than 30 seconds

Message ID 20220824002756.3659568-7-lsahlber@redhat.com (mailing list archive)
State New, archived
Headers show
Series [1/6] cifs: Make tcon contain a wrapper structure cached_fids instead of cached_fid | expand

Commit Message

Ronnie Sahlberg Aug. 24, 2022, 12:27 a.m. UTC
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/cached_dir.c | 32 ++++++++++++++++++++++++++++++--
 fs/cifs/cached_dir.h |  2 ++
 2 files changed, 32 insertions(+), 2 deletions(-)

Comments

Tom Talpey Aug. 24, 2022, 1:48 p.m. UTC | #1
On 8/23/2022 8:27 PM, Ronnie Sahlberg wrote:
> Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
> ---
>   fs/cifs/cached_dir.c | 32 ++++++++++++++++++++++++++++++--
>   fs/cifs/cached_dir.h |  2 ++
>   2 files changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c
> index f4d7700827b3..1c4a409bcb67 100644
> --- a/fs/cifs/cached_dir.c
> +++ b/fs/cifs/cached_dir.c
> @@ -14,6 +14,7 @@
>   
>   static struct cached_fid *init_cached_dir(const char *path);
>   static void free_cached_dir(struct cached_fid *cfid);
> +static void smb2_cached_lease_timeout(struct work_struct *work);
>   
>   /*
>    * Locking and reference count for cached directory handles:
> @@ -321,6 +322,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>   	cfid->tcon = tcon;
>   	cfid->time = jiffies;
>   	cfid->is_open = true;
> +	queue_delayed_work(cifsiod_wq, &cfid->lease_timeout, HZ * 30);

Wouldn't it be more efficient to implement a laundromat? There
will be MAX_CACHED_FIDS of these delayed workers, and the
timing does not need to be precise (right?).

Is it worth considering making HZ*30 into a tunable?

Tom.

> +	cfid->has_timeout = true;
>   	cfid->has_lease = true;
>   
>   oshr_free:
> @@ -447,6 +450,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
>   		list_add(&cfid->entry, &entry);
>   		cfids->num_entries--;
>   		cfid->is_open = false;
> +		cfid->has_timeout = false;
>   		/* To prevent race with smb2_cached_lease_break() */
>   		kref_get(&cfid->refcount);
>   	}
> @@ -455,6 +459,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
>   	list_for_each_entry_safe(cfid, q, &entry, entry) {
>   		cfid->on_list = false;
>   		list_del(&cfid->entry);
> +		cancel_delayed_work_sync(&cfid->lease_timeout);
>   		cancel_work_sync(&cfid->lease_break);
>   		if (cfid->has_lease) {
>   			/*
> @@ -477,12 +482,34 @@ smb2_cached_lease_break(struct work_struct *work)
>   	struct cached_fid *cfid = container_of(work,
>   				struct cached_fid, lease_break);
>   
> +	cancel_delayed_work_sync(&cfid->lease_timeout);
>   	spin_lock(&cfid->cfids->cfid_list_lock);
> +	if (!cfid->has_lease) {
> +		spin_unlock(&cfid->cfids->cfid_list_lock);
> +		return;
> +	}
>   	cfid->has_lease = false;
>   	spin_unlock(&cfid->cfids->cfid_list_lock);
>   	kref_put(&cfid->refcount, smb2_close_cached_fid);
>   }
>   
> +static void
> +smb2_cached_lease_timeout(struct work_struct *work)
> +{
> +	struct cached_fid *cfid = container_of(work,
> +				struct cached_fid, lease_timeout.work);
> +
> +	spin_lock(&cfid->cfids->cfid_list_lock);
> +	if (!cfid->has_timeout || !cfid->on_list) {
> +		spin_unlock(&cfid->cfids->cfid_list_lock);
> +		return;
> +	}
> +	cfid->has_timeout = false;
> +	spin_unlock(&cfid->cfids->cfid_list_lock);
> +
> +	queue_work(cifsiod_wq, &cfid->lease_break);
> +}
> +
>   int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
>   {
>   	struct cached_fids *cfids = tcon->cfids;
> @@ -506,9 +533,9 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
>   			cfid->on_list = false;
>   			cfids->num_entries--;
>   
> -			queue_work(cifsiod_wq,
> -				   &cfid->lease_break);
> +			cfid->has_timeout = false;
>   			spin_unlock(&cfids->cfid_list_lock);
> +			queue_work(cifsiod_wq, &cfid->lease_break);
>   			return true;
>   		}
>   	}
> @@ -530,6 +557,7 @@ static struct cached_fid *init_cached_dir(const char *path)
>   	}
>   
>   	INIT_WORK(&cfid->lease_break, smb2_cached_lease_break);
> +	INIT_DELAYED_WORK(&cfid->lease_timeout, smb2_cached_lease_timeout);
>   	INIT_LIST_HEAD(&cfid->entry);
>   	INIT_LIST_HEAD(&cfid->dirents.entries);
>   	mutex_init(&cfid->dirents.de_mutex);
> diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h
> index e536304ca2ce..813cd30f7ca3 100644
> --- a/fs/cifs/cached_dir.h
> +++ b/fs/cifs/cached_dir.h
> @@ -35,6 +35,7 @@ struct cached_fid {
>   	struct cached_fids *cfids;
>   	const char *path;
>   	bool has_lease:1;
> +	bool has_timeout:1;
>   	bool is_open:1;
>   	bool on_list:1;
>   	bool file_all_info_is_valid:1;
> @@ -45,6 +46,7 @@ struct cached_fid {
>   	struct cifs_tcon *tcon;
>   	struct dentry *dentry;
>   	struct work_struct lease_break;
> +	struct delayed_work lease_timeout;
>   	struct smb2_file_all_info file_all_info;
>   	struct cached_dirents dirents;
>   };
ronnie sahlberg Aug. 25, 2022, 4:39 a.m. UTC | #2
On Wed, 24 Aug 2022 at 23:58, Tom Talpey <tom@talpey.com> wrote:
>
> On 8/23/2022 8:27 PM, Ronnie Sahlberg wrote:
> > Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
> > ---
> >   fs/cifs/cached_dir.c | 32 ++++++++++++++++++++++++++++++--
> >   fs/cifs/cached_dir.h |  2 ++
> >   2 files changed, 32 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c
> > index f4d7700827b3..1c4a409bcb67 100644
> > --- a/fs/cifs/cached_dir.c
> > +++ b/fs/cifs/cached_dir.c
> > @@ -14,6 +14,7 @@
> >
> >   static struct cached_fid *init_cached_dir(const char *path);
> >   static void free_cached_dir(struct cached_fid *cfid);
> > +static void smb2_cached_lease_timeout(struct work_struct *work);
> >
> >   /*
> >    * Locking and reference count for cached directory handles:
> > @@ -321,6 +322,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >       cfid->tcon = tcon;
> >       cfid->time = jiffies;
> >       cfid->is_open = true;
> > +     queue_delayed_work(cifsiod_wq, &cfid->lease_timeout, HZ * 30);
>
> Wouldn't it be more efficient to implement a laundromat? There
> will be MAX_CACHED_FIDS of these delayed workers, and the
> timing does not need to be precise (right?).

I was thinking that first but thought it would be easier to just use
delayed events on the pre-existing
work-queue. However, having two potentially racing work items turned
out to be as complex as
just having a separate thread for the tcon.
I will drop this patch for now and re-do it with a laundromat instead.
It will be more efficient
and I think it will make the logic a bit simpler too.

>
> Is it worth considering making HZ*30 into a tunable?

Maybe. I can add that when I re-spin this patch.
Recinding leases is super hard to get right. Recind them too quickly
and you miss out on potential performance.
Recind them too late and you hurt performance, at least for other clients.

Right now the caching is binary. It is either enabled, or fully disabled.
I would like to have timeouts like this to be auto-adjusted by the
module itself, because setting it "correctly"
or "optimally" will probably be super hard, to impossible, for the
average sysadmin.
Heck, I don't think I could even set a parameter like this correctly.
And that is even not taking into account that workloads change
dynamically over time so any fixed value will only
be "correct" for some/part of the time anyway. Sometimes these changes
occur over just a few minutes/hours so a fixed value is impossible to
come up with.

I think it would be possible to have this to be automatically and
dynamically adjusted.
I want to measure things like "how often do we re-open a directory
while holding a lease",   "how often is the lease broken by the
server"
and try to keep the first as high as possible while also have the
second as low, or zero.
And have this adjust automatically at runtime.

>
> Tom.
>
> > +     cfid->has_timeout = true;
> >       cfid->has_lease = true;
> >
> >   oshr_free:
> > @@ -447,6 +450,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
> >               list_add(&cfid->entry, &entry);
> >               cfids->num_entries--;
> >               cfid->is_open = false;
> > +             cfid->has_timeout = false;
> >               /* To prevent race with smb2_cached_lease_break() */
> >               kref_get(&cfid->refcount);
> >       }
> > @@ -455,6 +459,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
> >       list_for_each_entry_safe(cfid, q, &entry, entry) {
> >               cfid->on_list = false;
> >               list_del(&cfid->entry);
> > +             cancel_delayed_work_sync(&cfid->lease_timeout);
> >               cancel_work_sync(&cfid->lease_break);
> >               if (cfid->has_lease) {
> >                       /*
> > @@ -477,12 +482,34 @@ smb2_cached_lease_break(struct work_struct *work)
> >       struct cached_fid *cfid = container_of(work,
> >                               struct cached_fid, lease_break);
> >
> > +     cancel_delayed_work_sync(&cfid->lease_timeout);
> >       spin_lock(&cfid->cfids->cfid_list_lock);
> > +     if (!cfid->has_lease) {
> > +             spin_unlock(&cfid->cfids->cfid_list_lock);
> > +             return;
> > +     }
> >       cfid->has_lease = false;
> >       spin_unlock(&cfid->cfids->cfid_list_lock);
> >       kref_put(&cfid->refcount, smb2_close_cached_fid);
> >   }
> >
> > +static void
> > +smb2_cached_lease_timeout(struct work_struct *work)
> > +{
> > +     struct cached_fid *cfid = container_of(work,
> > +                             struct cached_fid, lease_timeout.work);
> > +
> > +     spin_lock(&cfid->cfids->cfid_list_lock);
> > +     if (!cfid->has_timeout || !cfid->on_list) {
> > +             spin_unlock(&cfid->cfids->cfid_list_lock);
> > +             return;
> > +     }
> > +     cfid->has_timeout = false;
> > +     spin_unlock(&cfid->cfids->cfid_list_lock);
> > +
> > +     queue_work(cifsiod_wq, &cfid->lease_break);
> > +}
> > +
> >   int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> >   {
> >       struct cached_fids *cfids = tcon->cfids;
> > @@ -506,9 +533,9 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> >                       cfid->on_list = false;
> >                       cfids->num_entries--;
> >
> > -                     queue_work(cifsiod_wq,
> > -                                &cfid->lease_break);
> > +                     cfid->has_timeout = false;
> >                       spin_unlock(&cfids->cfid_list_lock);
> > +                     queue_work(cifsiod_wq, &cfid->lease_break);
> >                       return true;
> >               }
> >       }
> > @@ -530,6 +557,7 @@ static struct cached_fid *init_cached_dir(const char *path)
> >       }
> >
> >       INIT_WORK(&cfid->lease_break, smb2_cached_lease_break);
> > +     INIT_DELAYED_WORK(&cfid->lease_timeout, smb2_cached_lease_timeout);
> >       INIT_LIST_HEAD(&cfid->entry);
> >       INIT_LIST_HEAD(&cfid->dirents.entries);
> >       mutex_init(&cfid->dirents.de_mutex);
> > diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h
> > index e536304ca2ce..813cd30f7ca3 100644
> > --- a/fs/cifs/cached_dir.h
> > +++ b/fs/cifs/cached_dir.h
> > @@ -35,6 +35,7 @@ struct cached_fid {
> >       struct cached_fids *cfids;
> >       const char *path;
> >       bool has_lease:1;
> > +     bool has_timeout:1;
> >       bool is_open:1;
> >       bool on_list:1;
> >       bool file_all_info_is_valid:1;
> > @@ -45,6 +46,7 @@ struct cached_fid {
> >       struct cifs_tcon *tcon;
> >       struct dentry *dentry;
> >       struct work_struct lease_break;
> > +     struct delayed_work lease_timeout;
> >       struct smb2_file_all_info file_all_info;
> >       struct cached_dirents dirents;
> >   };
Tom Talpey Aug. 25, 2022, 3:29 p.m. UTC | #3
On 8/25/2022 12:39 AM, ronnie sahlberg wrote:
> On Wed, 24 Aug 2022 at 23:58, Tom Talpey <tom@talpey.com> wrote:
>> Is it worth considering making HZ*30 into a tunable?
> 
> Maybe. I can add that when I re-spin this patch.
> Recinding leases is super hard to get right. Recind them too quickly
> and you miss out on potential performance.
> Recind them too late and you hurt performance, at least for other clients.
> 
> Right now the caching is binary. It is either enabled, or fully disabled.
> I would like to have timeouts like this to be auto-adjusted by the
> module itself, because setting it "correctly"
> or "optimally" will probably be super hard, to impossible, for the
> average sysadmin.
> Heck, I don't think I could even set a parameter like this correctly.
> And that is even not taking into account that workloads change
> dynamically over time so any fixed value will only
> be "correct" for some/part of the time anyway. Sometimes these changes
> occur over just a few minutes/hours so a fixed value is impossible to
> come up with.
> 
> I think it would be possible to have this to be automatically and
> dynamically adjusted.
> I want to measure things like "how often do we re-open a directory
> while holding a lease",   "how often is the lease broken by the
> server"
> and try to keep the first as high as possible while also have the
> second as low, or zero.
> And have this adjust automatically at runtime.

Unfortunately I think the problem is that the lease behavior is
dependent on the workload, and whether the app is sharing files.
If the lease is never recalled, obviously the caching can be
"forever". But apps banging heads over a set of shared files in
a shared directory will be nearly pointless. I'm not sure how
you can sanely automate that detection.

However, it seems to me that 30 seconds is pretty arbitrary.
It might help speed up a "tar x" or "tar c" but is it broadly
worthwhile? Would 5 seconds do the same? Dunno.

Tom.
diff mbox series

Patch

diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c
index f4d7700827b3..1c4a409bcb67 100644
--- a/fs/cifs/cached_dir.c
+++ b/fs/cifs/cached_dir.c
@@ -14,6 +14,7 @@ 
 
 static struct cached_fid *init_cached_dir(const char *path);
 static void free_cached_dir(struct cached_fid *cfid);
+static void smb2_cached_lease_timeout(struct work_struct *work);
 
 /*
  * Locking and reference count for cached directory handles:
@@ -321,6 +322,8 @@  int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	cfid->tcon = tcon;
 	cfid->time = jiffies;
 	cfid->is_open = true;
+	queue_delayed_work(cifsiod_wq, &cfid->lease_timeout, HZ * 30);
+	cfid->has_timeout = true;
 	cfid->has_lease = true;
 
 oshr_free:
@@ -447,6 +450,7 @@  void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
 		list_add(&cfid->entry, &entry);
 		cfids->num_entries--;
 		cfid->is_open = false;
+		cfid->has_timeout = false;
 		/* To prevent race with smb2_cached_lease_break() */
 		kref_get(&cfid->refcount);
 	}
@@ -455,6 +459,7 @@  void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
 	list_for_each_entry_safe(cfid, q, &entry, entry) {
 		cfid->on_list = false;
 		list_del(&cfid->entry);
+		cancel_delayed_work_sync(&cfid->lease_timeout);
 		cancel_work_sync(&cfid->lease_break);
 		if (cfid->has_lease) {
 			/*
@@ -477,12 +482,34 @@  smb2_cached_lease_break(struct work_struct *work)
 	struct cached_fid *cfid = container_of(work,
 				struct cached_fid, lease_break);
 
+	cancel_delayed_work_sync(&cfid->lease_timeout);
 	spin_lock(&cfid->cfids->cfid_list_lock);
+	if (!cfid->has_lease) {
+		spin_unlock(&cfid->cfids->cfid_list_lock);
+		return;
+	}
 	cfid->has_lease = false;
 	spin_unlock(&cfid->cfids->cfid_list_lock);
 	kref_put(&cfid->refcount, smb2_close_cached_fid);
 }
 
+static void
+smb2_cached_lease_timeout(struct work_struct *work)
+{
+	struct cached_fid *cfid = container_of(work,
+				struct cached_fid, lease_timeout.work);
+
+	spin_lock(&cfid->cfids->cfid_list_lock);
+	if (!cfid->has_timeout || !cfid->on_list) {
+		spin_unlock(&cfid->cfids->cfid_list_lock);
+		return;
+	}
+	cfid->has_timeout = false;
+	spin_unlock(&cfid->cfids->cfid_list_lock);
+
+	queue_work(cifsiod_wq, &cfid->lease_break);
+}
+
 int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
 {
 	struct cached_fids *cfids = tcon->cfids;
@@ -506,9 +533,9 @@  int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
 			cfid->on_list = false;
 			cfids->num_entries--;
 
-			queue_work(cifsiod_wq,
-				   &cfid->lease_break);
+			cfid->has_timeout = false;
 			spin_unlock(&cfids->cfid_list_lock);
+			queue_work(cifsiod_wq, &cfid->lease_break);
 			return true;
 		}
 	}
@@ -530,6 +557,7 @@  static struct cached_fid *init_cached_dir(const char *path)
 	}
 
 	INIT_WORK(&cfid->lease_break, smb2_cached_lease_break);
+	INIT_DELAYED_WORK(&cfid->lease_timeout, smb2_cached_lease_timeout);
 	INIT_LIST_HEAD(&cfid->entry);
 	INIT_LIST_HEAD(&cfid->dirents.entries);
 	mutex_init(&cfid->dirents.de_mutex);
diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h
index e536304ca2ce..813cd30f7ca3 100644
--- a/fs/cifs/cached_dir.h
+++ b/fs/cifs/cached_dir.h
@@ -35,6 +35,7 @@  struct cached_fid {
 	struct cached_fids *cfids;
 	const char *path;
 	bool has_lease:1;
+	bool has_timeout:1;
 	bool is_open:1;
 	bool on_list:1;
 	bool file_all_info_is_valid:1;
@@ -45,6 +46,7 @@  struct cached_fid {
 	struct cifs_tcon *tcon;
 	struct dentry *dentry;
 	struct work_struct lease_break;
+	struct delayed_work lease_timeout;
 	struct smb2_file_all_info file_all_info;
 	struct cached_dirents dirents;
 };