diff mbox series

[v2,07/29] lustre: obd: collect all resource releasing for obj_type.

Message ID 1558356671-29599-8-git-send-email-jsimmons@infradead.org (mailing list archive)
State New, archived
Headers show
Series More lustre patches | expand

Commit Message

James Simmons May 20, 2019, 12:50 p.m. UTC
From: NeilBrown <neilb@suse.com>

Now that obj_type is managed as a kobject, move all
the freeing and deregistering into class_sysfs_release().
Only leave type->typ_lu handling since unloading obdecho
can trigger an assert.

lu_context_key_degister()) ASSERTION( atomic_read(&key->lct_used) >= 1 ) failed:
lu_context_key_degister()) LBUG
kernel: Pid: 10642, comm: rmmod
Call Trace:
[<ffffffffc096e7cc>] libcfs_call_trace+0x8c/0xc0 [libcfs]
[<ffffffffc096e87c>] lbug_with_loc+0x4c/0xa0 [libcfs]
[<ffffffffc0f9761c>] lu_context_key_degister+0x14c/0x160 [obdclass]
[<ffffffffc0f977d2>] lu_context_key_degister_many+0x72/0xb0 [obdclass]
[<ffffffffc13e7130>] echo_type_fini+0x20/0x30 [obdecho]
[<ffffffffc0f9618b>] lu_device_type_fini+0x1b/0x20 [obdclass]
[<ffffffffc0f67fce>] class_sysfs_release+0x3e/0x6b0 [obdclass]
[<ffffffffb85782a1>] kobject_release+0x81/0x1b0
[<ffffffffb8578138>] kobject_put+0x28/0x60
[<ffffffffc0f6940c>] class_unregister_type+0x23c/0x550 [obdclass]
[<ffffffffc13f9636>] obdecho_exit+0x10/0x9da [obdecho]

Reviewed-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: NeilBrown <neilb@suse.com>
---
 fs/lustre/obdclass/genops.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

Comments

NeilBrown May 22, 2019, 6:49 a.m. UTC | #1
On Mon, May 20 2019, James Simmons wrote:

> From: NeilBrown <neilb@suse.com>
>
> Now that obj_type is managed as a kobject, move all
> the freeing and deregistering into class_sysfs_release().
> Only leave type->typ_lu handling since unloading obdecho
> can trigger an assert.
>
> lu_context_key_degister()) ASSERTION( atomic_read(&key->lct_used) >= 1 ) failed:
> lu_context_key_degister()) LBUG
> kernel: Pid: 10642, comm: rmmod
> Call Trace:
> [<ffffffffc096e7cc>] libcfs_call_trace+0x8c/0xc0 [libcfs]
> [<ffffffffc096e87c>] lbug_with_loc+0x4c/0xa0 [libcfs]
> [<ffffffffc0f9761c>] lu_context_key_degister+0x14c/0x160 [obdclass]
> [<ffffffffc0f977d2>] lu_context_key_degister_many+0x72/0xb0 [obdclass]
> [<ffffffffc13e7130>] echo_type_fini+0x20/0x30 [obdecho]
> [<ffffffffc0f9618b>] lu_device_type_fini+0x1b/0x20 [obdclass]
> [<ffffffffc0f67fce>] class_sysfs_release+0x3e/0x6b0 [obdclass]
> [<ffffffffb85782a1>] kobject_release+0x81/0x1b0
> [<ffffffffb8578138>] kobject_put+0x28/0x60
> [<ffffffffc0f6940c>] class_unregister_type+0x23c/0x550 [obdclass]
> [<ffffffffc13f9636>] obdecho_exit+0x10/0x9da [obdecho]

I cannot reproduce this, and the change you suggest to fix is seems only
tangentially related to the symptom.

So I'm going to keep the lu_device_type_fini call in
class_sysfs_release().

If it happens again, I'd love to hear of it - even more so if you can
reproduce.

The most likely cause of the assertion failure is that  echo_type_fini
gets called twice.

Prior to
  Commit ef84c0736421 ("staging: lustre: use wait_event_var() in lu_context_key_degister()")

this would not have been fatal.  Now it is.  Maybe that is the cause of
this failed assertion.

Thanks,
NeilBrown


>
> Reviewed-by: James Simmons <jsimmons@infradead.org>
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  fs/lustre/obdclass/genops.c | 21 ++++++++++-----------
>  1 file changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/fs/lustre/obdclass/genops.c b/fs/lustre/obdclass/genops.c
> index ccd8a42..2a5ec93 100644
> --- a/fs/lustre/obdclass/genops.c
> +++ b/fs/lustre/obdclass/genops.c
> @@ -138,6 +138,15 @@ static void class_sysfs_release(struct kobject *kobj)
>  {
>  	struct obd_type *type = container_of(kobj, struct obd_type, typ_kobj);
>  
> +	debugfs_remove_recursive(type->typ_debugfs_entry);
> +
> +	spin_lock(&obd_types_lock);
> +	list_del(&type->typ_chain);
> +	spin_unlock(&obd_types_lock);
> +
> +	kfree(type->typ_name);
> +	kfree(type->typ_md_ops);
> +	kfree(type->typ_dt_ops);
>  	kfree(type);
>  }
>  
> @@ -167,6 +176,7 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
>  	if (!type)
>  		return -ENOMEM;
>  
> +	INIT_LIST_HEAD(&type->typ_chain);
>  	type->typ_kobj.kset = lustre_kset;
>  	rc = kobject_init_and_add(&type->typ_kobj, &class_ktype,
>  				  &lustre_kset->kobj, "%s", name);
> @@ -205,9 +215,6 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
>  	return 0;
>  
>  failed:
> -	kfree(type->typ_name);
> -	kfree(type->typ_md_ops);
> -	kfree(type->typ_dt_ops);
>  	kobject_put(&type->typ_kobj);
>  
>  	return rc;
> @@ -232,17 +239,9 @@ int class_unregister_type(const char *name)
>  		return -EBUSY;
>  	}
>  
> -	debugfs_remove_recursive(type->typ_debugfs_entry);
> -
>  	if (type->typ_lu)
>  		lu_device_type_fini(type->typ_lu);
>  
> -	spin_lock(&obd_types_lock);
> -	list_del(&type->typ_chain);
> -	spin_unlock(&obd_types_lock);
> -	kfree(type->typ_name);
> -	kfree(type->typ_dt_ops);
> -	kfree(type->typ_md_ops);
>  	kobject_put(&type->typ_kobj);
>  
>  	return 0;
> -- 
> 1.8.3.1
James Simmons May 22, 2019, 6:51 p.m. UTC | #2
>>  int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
>> @@ -181,10 +163,17 @@ int class_register_type(struct obd_ops *dt_ops, 
>struct md_ops *md_ops,
>>  		return -EEXIST;
>>  	}
>>  
>> -	rc = -ENOMEM;
>>  	type = kzalloc(sizeof(*type), GFP_NOFS);
>>  	if (!type)
>> -		return rc;
>> +		return -ENOMEM;
>> +
>> +	type->typ_kobj.kset = lustre_kset;
>> +	rc = kobject_init_and_add(&type->typ_kobj, &class_ktype,
>> +				  &lustre_kset->kobj, "%s", name);
>
>I don't know that I would actually cause a problem, but I don't like
>"add"ing and object (above) before fully initializing it (below).  So
>I've kept the split from my version where kobject_init() happens early
>and kobject_add() happens later.
>I've included the other changes that you made.
>
>Thanks,
>NeilBrown

The reason I did it that way was to handle the server case down the road.
So for the case when both client and server are on the same node, and yes
people do such setups for testing this is important.

Consider the case we have both the lov and lod layer on a single node. 
Both layers attempt to create "lov" obd_type. When the lov module loads 
first then a complete obd_type is created. Once lod loads then it just 
uses real lov obd_type and creates the needed symlinks. You end up with

ls -al /sys/fs/lustre/lov/
total 0
drwxr-xr-x  2 root root 0 May 22 14:27 .
drwxr-xr-x 14 root root 0 May 22 14:27 ..
lrwxrwxrwx  1 root root 0 May 22 14:27 lustre-MDT0000-mdtlov -> 
../lod/lustre-MDT0000-mdtlov
lrwxrwxrwx  1 root root 0 May 22 14:27 lustre-MDT0002-mdtlov -> 
../lod/lustre-MDT0002-mdtlov

Plus the real lov obd devices.

Now if lod loads first then the lod module creates a "lov" obd_type
using class_create_symlink() but is not fully initialized nor does
it need to be. If the client lov module is present and it loads then
it takes the "lov" obd_type create by the lod and finishes initializing
it.

Looking at the final code and added in server case:

        type = class_search_type(name);
        if (type) {
                kobject_put(&type->typ_kobj);
		if (strcmp(name, LUSTRE_LOV_NAME) == 0 ||
                    strcmp(name, LUSTRE_OSC_NAME) == 0)
                        goto dir_exist;
		CDEBUG(D_IOCTL, "Type %s already registered\n", name);
                return -EEXIST;
        }

        type = kzalloc(sizeof(*type), GFP_NOFS);
        if (!type)
                return rc;

        type->typ_kobj.kset = lustre_kset;
        kobject_init(&type->typ_kobj, &class_ktype);

        type->typ_dt_ops = dt_ops;  /* lov obd_type is never set to the 
				     * correct values if lod created it.
				     */
        type->typ_md_ops = md_ops;

        rc = kobject_add(&type->typ_kobj, &lustre_kset->kobj, "%s", name);
        if (rc)
                goto failed;

        type->typ_debugfs_entry = debugfs_create_dir(type->typ_name,
                                                     debugfs_lustre_root);
dir_exit:

Now if this needed for the later module handling patches I guess we could
do:

	if (strcmp(name, LUSTRE_LOV_NAME) == 0 ||
            strcmp(name, LUSTRE_OSC_NAME) == 0) {
		type->typ_dt_ops = dt_ops;
		type->typ_md_ops = md_ops;
		goto dir_exist;
	}

Is that the case?
Andreas Dilger May 22, 2019, 10:07 p.m. UTC | #3
On May 22, 2019, at 12:51, James Simmons <jsimmons@infradead.org> wrote:
> 
> 
>>> int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
>>> @@ -181,10 +163,17 @@ int class_register_type(struct obd_ops *dt_ops, 
>> struct md_ops *md_ops,
>>> 		return -EEXIST;
>>> 	}
>>> 
>>> -	rc = -ENOMEM;
>>> 	type = kzalloc(sizeof(*type), GFP_NOFS);
>>> 	if (!type)
>>> -		return rc;
>>> +		return -ENOMEM;
>>> +
>>> +	type->typ_kobj.kset = lustre_kset;
>>> +	rc = kobject_init_and_add(&type->typ_kobj, &class_ktype,
>>> +				  &lustre_kset->kobj, "%s", name);
>> 
>> I don't know that I would actually cause a problem, but I don't like
>> "add"ing and object (above) before fully initializing it (below).  So
>> I've kept the split from my version where kobject_init() happens early
>> and kobject_add() happens later.
>> I've included the other changes that you made.
>> 
>> Thanks,
>> NeilBrown
> 
> The reason I did it that way was to handle the server case down the road.
> So for the case when both client and server are on the same node, and yes
> people do such setups for testing this is important.
> 
> Consider the case we have both the lov and lod layer on a single node. 
> Both layers attempt to create "lov" obd_type. When the lov module loads 
> first then a complete obd_type is created. Once lod loads then it just 
> uses real lov obd_type and creates the needed symlinks.

We've had the "lov->lod" symlinks on servers since Lustre 2.4 or so (when
osd-zfs was first added).  We could just remove this compatibility, so
long as the test scripts were updated to always use "lod" on the server
and "lov" on the client (previously they shared the same "lov" code module
so the path was the same).  We don't need test script interop going back
further than that, so this should be OK.

Cheers, Andreas

> You end up with
> 
> ls -al /sys/fs/lustre/lov/
> total 0
> drwxr-xr-x  2 root root 0 May 22 14:27 .
> drwxr-xr-x 14 root root 0 May 22 14:27 ..
> lrwxrwxrwx  1 root root 0 May 22 14:27 lustre-MDT0000-mdtlov -> 
> ../lod/lustre-MDT0000-mdtlov
> lrwxrwxrwx  1 root root 0 May 22 14:27 lustre-MDT0002-mdtlov -> 
> ../lod/lustre-MDT0002-mdtlov
> 
> Plus the real lov obd devices.
> 
> Now if lod loads first then the lod module creates a "lov" obd_type
> using class_create_symlink() but is not fully initialized nor does
> it need to be. If the client lov module is present and it loads then
> it takes the "lov" obd_type create by the lod and finishes initializing
> it.
> 
> Looking at the final code and added in server case:
> 
>        type = class_search_type(name);
>        if (type) {
>                kobject_put(&type->typ_kobj);
> 		if (strcmp(name, LUSTRE_LOV_NAME) == 0 ||
>                    strcmp(name, LUSTRE_OSC_NAME) == 0)
>                        goto dir_exist;
> 		CDEBUG(D_IOCTL, "Type %s already registered\n", name);
>                return -EEXIST;
>        }
> 
>        type = kzalloc(sizeof(*type), GFP_NOFS);
>        if (!type)
>                return rc;
> 
>        type->typ_kobj.kset = lustre_kset;
>        kobject_init(&type->typ_kobj, &class_ktype);
> 
>        type->typ_dt_ops = dt_ops;  /* lov obd_type is never set to the 
> 				     * correct values if lod created it.
> 				     */
>        type->typ_md_ops = md_ops;
> 
>        rc = kobject_add(&type->typ_kobj, &lustre_kset->kobj, "%s", name);
>        if (rc)
>                goto failed;
> 
>        type->typ_debugfs_entry = debugfs_create_dir(type->typ_name,
>                                                     debugfs_lustre_root);
> dir_exit:
> 
> Now if this needed for the later module handling patches I guess we could
> do:
> 
> 	if (strcmp(name, LUSTRE_LOV_NAME) == 0 ||
>            strcmp(name, LUSTRE_OSC_NAME) == 0) {
> 		type->typ_dt_ops = dt_ops;
> 		type->typ_md_ops = md_ops;
> 		goto dir_exist;
> 	}
> 
> Is that the case?
> 
> 

Cheers, Andreas
--
Andreas Dilger
Principal Lustre Architect
Whamcloud
James Simmons June 1, 2019, 12:38 a.m. UTC | #4
> >>> int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
> >>> @@ -181,10 +163,17 @@ int class_register_type(struct obd_ops *dt_ops, 
> >> struct md_ops *md_ops,
> >>> 		return -EEXIST;
> >>> 	}
> >>> 
> >>> -	rc = -ENOMEM;
> >>> 	type = kzalloc(sizeof(*type), GFP_NOFS);
> >>> 	if (!type)
> >>> -		return rc;
> >>> +		return -ENOMEM;
> >>> +
> >>> +	type->typ_kobj.kset = lustre_kset;
> >>> +	rc = kobject_init_and_add(&type->typ_kobj, &class_ktype,
> >>> +				  &lustre_kset->kobj, "%s", name);
> >> 
> >> I don't know that I would actually cause a problem, but I don't like
> >> "add"ing and object (above) before fully initializing it (below).  So
> >> I've kept the split from my version where kobject_init() happens early
> >> and kobject_add() happens later.
> >> I've included the other changes that you made.
> >> 
> >> Thanks,
> >> NeilBrown
> > 
> > The reason I did it that way was to handle the server case down the road.
> > So for the case when both client and server are on the same node, and yes
> > people do such setups for testing this is important.
> > 
> > Consider the case we have both the lov and lod layer on a single node. 
> > Both layers attempt to create "lov" obd_type. When the lov module loads 
> > first then a complete obd_type is created. Once lod loads then it just 
> > uses real lov obd_type and creates the needed symlinks.
> 
> We've had the "lov->lod" symlinks on servers since Lustre 2.4 or so (when
> osd-zfs was first added).  We could just remove this compatibility, so
> long as the test scripts were updated to always use "lod" on the server
> and "lov" on the client (previously they shared the same "lov" code module
> so the path was the same).  We don't need test script interop going back
> further than that, so this should be OK.
> 
> Cheers, Andreas

I ripped off the band aid and boy did it bleed. I going to have to work
out the test suite changes. Well will need to back port the test suite
changes to 2.12 for interop testing.
 
> > You end up with
> > 
> > ls -al /sys/fs/lustre/lov/
> > total 0
> > drwxr-xr-x  2 root root 0 May 22 14:27 .
> > drwxr-xr-x 14 root root 0 May 22 14:27 ..
> > lrwxrwxrwx  1 root root 0 May 22 14:27 lustre-MDT0000-mdtlov -> 
> > ../lod/lustre-MDT0000-mdtlov
> > lrwxrwxrwx  1 root root 0 May 22 14:27 lustre-MDT0002-mdtlov -> 
> > ../lod/lustre-MDT0002-mdtlov
> > 
> > Plus the real lov obd devices.
> > 
> > Now if lod loads first then the lod module creates a "lov" obd_type
> > using class_create_symlink() but is not fully initialized nor does
> > it need to be. If the client lov module is present and it loads then
> > it takes the "lov" obd_type create by the lod and finishes initializing
> > it.
> > 
> > Looking at the final code and added in server case:
> > 
> >        type = class_search_type(name);
> >        if (type) {
> >                kobject_put(&type->typ_kobj);
> > 		if (strcmp(name, LUSTRE_LOV_NAME) == 0 ||
> >                    strcmp(name, LUSTRE_OSC_NAME) == 0)
> >                        goto dir_exist;
> > 		CDEBUG(D_IOCTL, "Type %s already registered\n", name);
> >                return -EEXIST;
> >        }
> > 
> >        type = kzalloc(sizeof(*type), GFP_NOFS);
> >        if (!type)
> >                return rc;
> > 
> >        type->typ_kobj.kset = lustre_kset;
> >        kobject_init(&type->typ_kobj, &class_ktype);
> > 
> >        type->typ_dt_ops = dt_ops;  /* lov obd_type is never set to the 
> > 				     * correct values if lod created it.
> > 				     */
> >        type->typ_md_ops = md_ops;
> > 
> >        rc = kobject_add(&type->typ_kobj, &lustre_kset->kobj, "%s", name);
> >        if (rc)
> >                goto failed;
> > 
> >        type->typ_debugfs_entry = debugfs_create_dir(type->typ_name,
> >                                                     debugfs_lustre_root);
> > dir_exit:
> > 
> > Now if this needed for the later module handling patches I guess we could
> > do:
> > 
> > 	if (strcmp(name, LUSTRE_LOV_NAME) == 0 ||
> >            strcmp(name, LUSTRE_OSC_NAME) == 0) {
> > 		type->typ_dt_ops = dt_ops;
> > 		type->typ_md_ops = md_ops;
> > 		goto dir_exist;
> > 	}
> > 
> > Is that the case?
> > 
> > 
> 
> Cheers, Andreas
> --
> Andreas Dilger
> Principal Lustre Architect
> Whamcloud
> 
>
diff mbox series

Patch

diff --git a/fs/lustre/obdclass/genops.c b/fs/lustre/obdclass/genops.c
index ccd8a42..2a5ec93 100644
--- a/fs/lustre/obdclass/genops.c
+++ b/fs/lustre/obdclass/genops.c
@@ -138,6 +138,15 @@  static void class_sysfs_release(struct kobject *kobj)
 {
 	struct obd_type *type = container_of(kobj, struct obd_type, typ_kobj);
 
+	debugfs_remove_recursive(type->typ_debugfs_entry);
+
+	spin_lock(&obd_types_lock);
+	list_del(&type->typ_chain);
+	spin_unlock(&obd_types_lock);
+
+	kfree(type->typ_name);
+	kfree(type->typ_md_ops);
+	kfree(type->typ_dt_ops);
 	kfree(type);
 }
 
@@ -167,6 +176,7 @@  int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
 	if (!type)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&type->typ_chain);
 	type->typ_kobj.kset = lustre_kset;
 	rc = kobject_init_and_add(&type->typ_kobj, &class_ktype,
 				  &lustre_kset->kobj, "%s", name);
@@ -205,9 +215,6 @@  int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
 	return 0;
 
 failed:
-	kfree(type->typ_name);
-	kfree(type->typ_md_ops);
-	kfree(type->typ_dt_ops);
 	kobject_put(&type->typ_kobj);
 
 	return rc;
@@ -232,17 +239,9 @@  int class_unregister_type(const char *name)
 		return -EBUSY;
 	}
 
-	debugfs_remove_recursive(type->typ_debugfs_entry);
-
 	if (type->typ_lu)
 		lu_device_type_fini(type->typ_lu);
 
-	spin_lock(&obd_types_lock);
-	list_del(&type->typ_chain);
-	spin_unlock(&obd_types_lock);
-	kfree(type->typ_name);
-	kfree(type->typ_dt_ops);
-	kfree(type->typ_md_ops);
 	kobject_put(&type->typ_kobj);
 
 	return 0;