diff mbox

[linux-cifs-client] Cannot allocate memory

Message ID 49B687D8.2060600@etersoft.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Shilovsky March 10, 2009, 3:31 p.m. UTC
Hello!

This patch resolves problem "Cannot allocate memory" during loading 
module when we do unload/load quickly. During unloading the 
cifs_demultiplex_thread doesn't have enough time for clearing cache and 
kmem_cache_destroy fails. Then during loading module it fails with 
kmem_cache_create: duplicate cache cifs_request.

This script can show this bug:

rmmod cifs

for i in `seq 1 20`;
do {
        echo "trying to mount " $i
        insmod ./cifs.ko
        cifsmount [имя шары] [путь] -onoperm
        umount [путь]
        rmmod cifs
};
done



And it fails on 2nd iteration.

My patch fixes it.



--
Best regards,
Pavel Shilovsky.

Comments

Jeff Layton March 10, 2009, 7:01 p.m. UTC | #1
On Tue, 10 Mar 2009 18:31:36 +0300
Pavel Shilovsky <piastry@etersoft.ru> wrote:

> Hello!
> 
> This patch resolves problem "Cannot allocate memory" during loading 
> module when we do unload/load quickly. During unloading the 
> cifs_demultiplex_thread doesn't have enough time for clearing cache and 
> kmem_cache_destroy fails. Then during loading module it fails with 
> kmem_cache_create: duplicate cache cifs_request.
> 
> This script can show this bug:
> 
> rmmod cifs
> 
> for i in `seq 1 20`;
> do {
>         echo "trying to mount " $i
>         insmod ./cifs.ko
>         cifsmount [имя шары] [путь] -onoperm
>         umount [путь]
>         rmmod cifs
> };
> done
> 
> 
> 
> And it fails on 2nd iteration.
> 
> My patch fixes it.
> 
> diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
> index 1ae6314..18ba45a 100644
> --- a/fs/cifs/cifsglob.h
> +++ b/fs/cifs/cifsglob.h
> @@ -176,6 +176,7 @@ struct TCP_Server_Info {
>         struct mac_key mac_signing_key;
>         char ntlmv2_hash[16];
>         unsigned long lstrp; /* when we got last response from this 
> server */
> +       int running;
>  };
> 
>  /*
> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> index f254235..39daaf2 100644
> --- a/fs/cifs/connect.c
> +++ b/fs/cifs/connect.c
> @@ -347,6 +347,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
>         bool isMultiRsp;
>         int reconnect;
> 
> +       server->running = 1;
>         current->flags |= PF_MEMALLOC;
>         cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
> 
> @@ -758,6 +759,8 @@ multi_t2_fnd:
> 
>         kfree(server->hostname);
>         task_to_wake = xchg(&server->tsk, NULL);
> +       server->running = 0;
> +       msleep(20);
>         kfree(server);
> 
>         length = atomic_dec_return(&tcpSesAllocCount);
> @@ -1408,6 +1411,9 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
>         task = xchg(&server->tsk, NULL);
>         if (task)
>                 force_sig(SIGKILL, task);
> +
> +       while(server->running == 1)
> +               msleep(10);
>  }
> 
>  static struct cifsSesInfo *
> 

On what kernel version are you reproducing this? Does it have this patch?

commit 0468a2cf914e79442b8309ce62e3f861599d8cab
Author: Jeff Layton <jlayton@redhat.com>
Date:   Mon Dec 1 07:09:35 2008 -0500

    cifs: take module reference when starting cifsd
    
    cifsd can outlive the last cifs mount. We need to hold a module
    reference until it exits to prevent someone from unplugging
    the module until we're ready.
    
    Signed-off-by: Jeff Layton <jlayton@redhat.com>
    Signed-off-by: Steve French <sfrench@us.ibm.com>
Pavel Shilovsky March 10, 2009, 9:46 p.m. UTC | #2
On Tuesday 10 March 2009 22:01:28 you wrote:
> On Tue, 10 Mar 2009 18:31:36 +0300
>
> Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > Hello!
> >
> > This patch resolves problem "Cannot allocate memory" during loading
> > module when we do unload/load quickly. During unloading the
> > cifs_demultiplex_thread doesn't have enough time for clearing cache and
> > kmem_cache_destroy fails. Then during loading module it fails with
> > kmem_cache_create: duplicate cache cifs_request.
> >
> > This script can show this bug:
> >

> > rmmod cifs
> >
> > for i in `seq 1 20`;
> > do {
> >         echo "trying to mount " $i
> >         insmod ./cifs.ko
> >         cifsmount [имя шары] [путь] -onoperm
Sorry, I forgot to change this line to:
              cifsmount [share name] [path] -onoperm
> >         umount [путь]
> >         rmmod cifs
> > };
> > done
> >
> >
> >
> > And it fails on 2nd iteration.
> >
> > My patch fixes it.
> >
> > diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
> > index 1ae6314..18ba45a 100644
> > --- a/fs/cifs/cifsglob.h
> > +++ b/fs/cifs/cifsglob.h
> > @@ -176,6 +176,7 @@ struct TCP_Server_Info {
> >         struct mac_key mac_signing_key;
> >         char ntlmv2_hash[16];
> >         unsigned long lstrp; /* when we got last response from this
> > server */
> > +       int running;
> >  };
> >
> >  /*
> > diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> > index f254235..39daaf2 100644
> > --- a/fs/cifs/connect.c
> > +++ b/fs/cifs/connect.c
> > @@ -347,6 +347,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info
> > *server) bool isMultiRsp;
> >         int reconnect;
> >
> > +       server->running = 1;
> >         current->flags |= PF_MEMALLOC;
> >         cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
> >
> > @@ -758,6 +759,8 @@ multi_t2_fnd:
> >
> >         kfree(server->hostname);
> >         task_to_wake = xchg(&server->tsk, NULL);
> > +       server->running = 0;
> > +       msleep(20);
> >         kfree(server);
> >
> >         length = atomic_dec_return(&tcpSesAllocCount);
> > @@ -1408,6 +1411,9 @@ cifs_put_tcp_session(struct TCP_Server_Info
> > *server) task = xchg(&server->tsk, NULL);
> >         if (task)
> >                 force_sig(SIGKILL, task);
> > +
> > +       while(server->running == 1)
> > +               msleep(10);
> >  }
> >
> >  static struct cifsSesInfo *
>
> On what kernel version are you reproducing this? Does it have this patch?
>
> commit 0468a2cf914e79442b8309ce62e3f861599d8cab
> Author: Jeff Layton <jlayton@redhat.com>
> Date:   Mon Dec 1 07:09:35 2008 -0500
>
>     cifs: take module reference when starting cifsd
>
>     cifsd can outlive the last cifs mount. We need to hold a module
>     reference until it exits to prevent someone from unplugging
>     the module until we're ready.
>
>     Signed-off-by: Jeff Layton <jlayton@redhat.com>
>     Signed-off-by: Steve French <sfrench@us.ibm.com>
I have 2.6.27 version of kernel and it doesn't contain this code. I added it 
and tested.
Result: Umounting share finish and when I try to do "rmmod cifs" it says that 
module is in use. I have to wait some time to do it successfully.

My patch provides another behaviour: when umounting share finishes I can remove 
module at once.
Jeff Layton March 10, 2009, 11:11 p.m. UTC | #3
On Wed, 11 Mar 2009 00:43:28 +0300
Pavel Shilovsky <piastry@etersoft.ru> wrote:

> On Tuesday 10 March 2009 22:01:28 you wrote:
> > On Tue, 10 Mar 2009 18:31:36 +0300
> >
> > Pavel Shilovsky <piastry@etersoft.ru> wrote:
> > > Hello!
> > >
> > > This patch resolves problem "Cannot allocate memory" during loading
> > > module when we do unload/load quickly. During unloading the
> > > cifs_demultiplex_thread doesn't have enough time for clearing cache and
> > > kmem_cache_destroy fails. Then during loading module it fails with
> > > kmem_cache_create: duplicate cache cifs_request.
> > >
> > > This script can show this bug:
> > >
> 
> > > rmmod cifs
> > >
> > > for i in `seq 1 20`;
> > > do {
> > >         echo "trying to mount " $i
> > >         insmod ./cifs.ko
> > >         cifsmount [имя шары] [путь] -onoperm
> Sorry, I forgot to change this line to:
> 	      cifsmount [share name] [path] -onoperm
> > >         umount [путь]
> > >         rmmod cifs
> > > };
> > > done
> > >
> > >
> > >
> > > And it fails on 2nd iteration.
> > >
> > > My patch fixes it.
> > >
> > > diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
> > > index 1ae6314..18ba45a 100644
> > > --- a/fs/cifs/cifsglob.h
> > > +++ b/fs/cifs/cifsglob.h
> > > @@ -176,6 +176,7 @@ struct TCP_Server_Info {
> > >         struct mac_key mac_signing_key;
> > >         char ntlmv2_hash[16];
> > >         unsigned long lstrp; /* when we got last response from this
> > > server */
> > > +       int running;
> > >  };
> > >
> > >  /*
> > > diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> > > index f254235..39daaf2 100644
> > > --- a/fs/cifs/connect.c
> > > +++ b/fs/cifs/connect.c
> > > @@ -347,6 +347,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info
> > > *server) bool isMultiRsp;
> > >         int reconnect;
> > >
> > > +       server->running = 1;
> > >         current->flags |= PF_MEMALLOC;
> > >         cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
> > >
> > > @@ -758,6 +759,8 @@ multi_t2_fnd:
> > >
> > >         kfree(server->hostname);
> > >         task_to_wake = xchg(&server->tsk, NULL);
> > > +       server->running = 0;
> > > +       msleep(20);
> > >         kfree(server);
> > >
> > >         length = atomic_dec_return(&tcpSesAllocCount);
> > > @@ -1408,6 +1411,9 @@ cifs_put_tcp_session(struct TCP_Server_Info
> > > *server) task = xchg(&server->tsk, NULL);
> > >         if (task)
> > >                 force_sig(SIGKILL, task);
> > > +
> > > +       while(server->running == 1)
> > > +               msleep(10);
> > >  }
> > >
> > >  static struct cifsSesInfo *
> >
> > On what kernel version are you reproducing this? Does it have this patch?
> >
> > commit 0468a2cf914e79442b8309ce62e3f861599d8cab
> > Author: Jeff Layton <jlayton@redhat.com>
> > Date:   Mon Dec 1 07:09:35 2008 -0500
> >
> >     cifs: take module reference when starting cifsd
> >
> >     cifsd can outlive the last cifs mount. We need to hold a module
> >     reference until it exits to prevent someone from unplugging
> >     the module until we're ready.
> >
> >     Signed-off-by: Jeff Layton <jlayton@redhat.com>
> >     Signed-off-by: Steve French <sfrench@us.ibm.com>
> I have 2.6.27 version of kernel and it doesn't contain this code. I added it 
> and tested.

(re-cc'ing linux-cifs-client)

Ok, it's much more helpful to post patches that are based on current
upstream code. For cifs, the ideal is to base (and test) your patches
on Steve F's tree.

> Result: Umounting share finish and when I try to do "rmmod cifs" it says that 
> module is in use. I have to wait some time to do it successfully.
> 

That's what I'd expect. The module is still in use at that time,
regardless of whether any mounts are still active.

> My patch provides another behaviour: when umounting share finishes I can remove 
> module at once.  
> 

Your patch simply adds an arbitrary delay in unmounting. There's no
guarantee that 10ms is enough time for all of the shutdown processing
in cifsd to occur. It also adds a 20ms delay for the thread itself to
come down. What's that all about?

Finally, it's also racy:

> >         if (task)
> >                 force_sig(SIGKILL, task);
> > +
> > +       while(server->running == 1)
> > +               msleep(10);

You have no guarantee that "server" is still a valid pointer when you
go to dereference it. In most cases it probably will be, but hard to
hit races are *worse* since they're harder to track down.

We have to *know* that this pointer will be valid here before you do
something like this. Rather than adding arbitrary delays like this, a
better scheme would use a completion variable to indicate when the
thread is down.

Finally, I'm not convinced of the need for this at all. Why should we
guarantee that the module is able to be unplugged the instant that the
last mount vanishes? I'd much rather my umounts return quickly rather
than have them wait for the thread to come down.
diff mbox

Patch

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1ae6314..18ba45a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -176,6 +176,7 @@  struct TCP_Server_Info {
        struct mac_key mac_signing_key;
        char ntlmv2_hash[16];
        unsigned long lstrp; /* when we got last response from this 
server */
+       int running;
 };

 /*
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f254235..39daaf2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -347,6 +347,7 @@  cifs_demultiplex_thread(struct TCP_Server_Info *server)
        bool isMultiRsp;
        int reconnect;

+       server->running = 1;
        current->flags |= PF_MEMALLOC;
        cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));

@@ -758,6 +759,8 @@  multi_t2_fnd:

        kfree(server->hostname);
        task_to_wake = xchg(&server->tsk, NULL);
+       server->running = 0;
+       msleep(20);
        kfree(server);

        length = atomic_dec_return(&tcpSesAllocCount);
@@ -1408,6 +1411,9 @@  cifs_put_tcp_session(struct TCP_Server_Info *server)
        task = xchg(&server->tsk, NULL);
        if (task)
                force_sig(SIGKILL, task);
+
+       while(server->running == 1)
+               msleep(10);
 }

 static struct cifsSesInfo *