diff mbox series

[v2] lustre: mdc: fix possible deadlock in chlg_open()

Message ID 875zxh3elu.fsf@notabene.neil.brown.name (mailing list archive)
State New, archived
Headers show
Series [v2] lustre: mdc: fix possible deadlock in chlg_open() | expand

Commit Message

NeilBrown Oct. 31, 2018, 11:01 p.m. UTC
Lockdep reports a possible deadlock between chlg_open() and
mdc_changelog_cdev_init()

mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
calls misc_register() which takes misc_mtx.
chlg_open() is called while misc_mtx is held, and tries to take
chlg_registered_dev_lock.
If these two functions race, a deadlock can occur as each thread will
hold one of the locks while trying to take the other.

chlg_open() does not need to take a lock.  It only uses the
lock to stablize a list while looking for the matching
chlg_registered_dev, and this can be found directly by examining
file->private_data.

So remove chlg_obd_get(), and use file->private_data to find the
obd_device.
Also ensure the device is fully initialized before calling
misc_register().  This means setting up some list linkage before the
call, and tearing it down if there is an error.

Signed-off-by: NeilBrown <neilb@suse.com>
---

This is the revised version with the problem identified by Quentin
fixed.

 drivers/staging/lustre/lustre/mdc/mdc_changelog.c | 46 +++++++----------------
 1 file changed, 14 insertions(+), 32 deletions(-)

Comments

James Simmons Nov. 4, 2018, 9:34 p.m. UTC | #1
> Lockdep reports a possible deadlock between chlg_open() and
> mdc_changelog_cdev_init()
> 
> mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
> calls misc_register() which takes misc_mtx.
> chlg_open() is called while misc_mtx is held, and tries to take
> chlg_registered_dev_lock.
> If these two functions race, a deadlock can occur as each thread will
> hold one of the locks while trying to take the other.
> 
> chlg_open() does not need to take a lock.  It only uses the
> lock to stablize a list while looking for the matching
> chlg_registered_dev, and this can be found directly by examining
> file->private_data.
> 
> So remove chlg_obd_get(), and use file->private_data to find the
> obd_device.
> Also ensure the device is fully initialized before calling
> misc_register().  This means setting up some list linkage before the
> call, and tearing it down if there is an error.

I have been testing this but I'm no HSM expert. I pushed this patch
to OpenSFS branch as well.

https://jira.whamcloud.com/browse/LU-11617
https://review.whamcloud.com/#/c/33572/

Reviewed-by: James Simmons <jsimmons@infradead.org>

> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
> 
> This is the revised version with the problem identified by Quentin
> fixed.
> 
>  drivers/staging/lustre/lustre/mdc/mdc_changelog.c | 46 +++++++----------------
>  1 file changed, 14 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/staging/lustre/lustre/mdc/mdc_changelog.c b/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
> index d83507cbf95c..af29ea73c48a 100644
> --- a/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
> +++ b/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
> @@ -444,31 +444,6 @@ static ssize_t chlg_write(struct file *file, const char __user *buff,
>  	return rc < 0 ? rc : count;
>  }
>  
> -/**
> - * Find the OBD device associated to a changelog character device.
> - * @param[in]  cdev  character device instance descriptor
> - * @return corresponding OBD device or NULL if none was found.
> - */
> -static struct obd_device *chlg_obd_get(dev_t cdev)
> -{
> -	int minor = MINOR(cdev);
> -	struct obd_device *obd = NULL;
> -	struct chlg_registered_dev *curr;
> -
> -	mutex_lock(&chlg_registered_dev_lock);
> -	list_for_each_entry(curr, &chlg_registered_devices, ced_link) {
> -		if (curr->ced_misc.minor == minor) {
> -			/* take the first available OBD device attached */
> -			obd = list_first_entry(&curr->ced_obds,
> -					       struct obd_device,
> -					       u.cli.cl_chg_dev_linkage);
> -			break;
> -		}
> -	}
> -	mutex_unlock(&chlg_registered_dev_lock);
> -	return obd;
> -}
> -
>  /**
>   * Open handler, initialize internal CRS state and spawn prefetch thread if
>   * needed.
> @@ -479,12 +454,16 @@ static struct obd_device *chlg_obd_get(dev_t cdev)
>  static int chlg_open(struct inode *inode, struct file *file)
>  {
>  	struct chlg_reader_state *crs;
> -	struct obd_device *obd = chlg_obd_get(inode->i_rdev);
> +	struct miscdevice *misc = file->private_data;
> +	struct chlg_registered_dev *dev;
> +	struct obd_device *obd;
>  	struct task_struct *task;
>  	int rc;
>  
> -	if (!obd)
> -		return -ENODEV;
> +	dev = container_of(misc, struct chlg_registered_dev, ced_misc);
> +	obd = list_first_entry(&dev->ced_obds,
> +			       struct obd_device,
> +			       u.cli.cl_chg_dev_linkage);
>  
>  	crs = kzalloc(sizeof(*crs), GFP_KERNEL);
>  	if (!crs)
> @@ -669,13 +648,16 @@ int mdc_changelog_cdev_init(struct obd_device *obd)
>  		goto out_unlock;
>  	}
>  
> +	list_add_tail(&obd->u.cli.cl_chg_dev_linkage, &entry->ced_obds);
> +	list_add_tail(&entry->ced_link, &chlg_registered_devices);
> +
>  	/* Register new character device */
>  	rc = misc_register(&entry->ced_misc);
> -	if (rc != 0)
> +	if (rc != 0) {
> +		list_del_init(&obd->u.cli.cl_chg_dev_linkage);
> +		list_del(&entry->ced_link);
>  		goto out_unlock;
> -
> -	list_add_tail(&obd->u.cli.cl_chg_dev_linkage, &entry->ced_obds);
> -	list_add_tail(&entry->ced_link, &chlg_registered_devices);
> +	}
>  
>  	entry = NULL;	/* prevent it from being freed below */
>  
> -- 
> 2.14.0.rc0.dirty
> 
>
BOUGET Quentin Nov. 5, 2018, 1:37 p.m. UTC | #2
Le 04/11/2018 à 22:34, James Simmons a écrit :
>> Lockdep reports a possible deadlock between chlg_open() and
>> mdc_changelog_cdev_init()
>>
>> mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
>> calls misc_register() which takes misc_mtx.
>> chlg_open() is called while misc_mtx is held, and tries to take
>> chlg_registered_dev_lock.
>> If these two functions race, a deadlock can occur as each thread will
>> hold one of the locks while trying to take the other.
>>
>> chlg_open() does not need to take a lock.  It only uses the
>> lock to stablize a list while looking for the matching
>> chlg_registered_dev, and this can be found directly by examining
>> file->private_data.
>>
>> So remove chlg_obd_get(), and use file->private_data to find the
>> obd_device.
>> Also ensure the device is fully initialized before calling
>> misc_register().  This means setting up some list linkage before the
>> call, and tearing it down if there is an error.
> I have been testing this but I'm no HSM expert. I pushed this patch
> to OpenSFS branch as well.
>
> https://jira.whamcloud.com/browse/LU-11617
> https://review.whamcloud.com/#/c/33572/
>
> Reviewed-by: James Simmons <jsimmons@infradead.org>

Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>

>
>> Signed-off-by: NeilBrown <neilb@suse.com>
>> ---
>>
>> This is the revised version with the problem identified by Quentin
>> fixed.
>>
>>   drivers/staging/lustre/lustre/mdc/mdc_changelog.c | 46 +++++++----------------
>>   1 file changed, 14 insertions(+), 32 deletions(-)
>>
>> diff --git a/drivers/staging/lustre/lustre/mdc/mdc_changelog.c b/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
>> index d83507cbf95c..af29ea73c48a 100644
>> --- a/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
>> +++ b/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
>> @@ -444,31 +444,6 @@ static ssize_t chlg_write(struct file *file, const char __user *buff,
>>   	return rc < 0 ? rc : count;
>>   }
>>   
>> -/**
>> - * Find the OBD device associated to a changelog character device.
>> - * @param[in]  cdev  character device instance descriptor
>> - * @return corresponding OBD device or NULL if none was found.
>> - */
>> -static struct obd_device *chlg_obd_get(dev_t cdev)
>> -{
>> -	int minor = MINOR(cdev);
>> -	struct obd_device *obd = NULL;
>> -	struct chlg_registered_dev *curr;
>> -
>> -	mutex_lock(&chlg_registered_dev_lock);
>> -	list_for_each_entry(curr, &chlg_registered_devices, ced_link) {
>> -		if (curr->ced_misc.minor == minor) {
>> -			/* take the first available OBD device attached */
>> -			obd = list_first_entry(&curr->ced_obds,
>> -					       struct obd_device,
>> -					       u.cli.cl_chg_dev_linkage);
>> -			break;
>> -		}
>> -	}
>> -	mutex_unlock(&chlg_registered_dev_lock);
>> -	return obd;
>> -}
>> -
>>   /**
>>    * Open handler, initialize internal CRS state and spawn prefetch thread if
>>    * needed.
>> @@ -479,12 +454,16 @@ static struct obd_device *chlg_obd_get(dev_t cdev)
>>   static int chlg_open(struct inode *inode, struct file *file)
>>   {
>>   	struct chlg_reader_state *crs;
>> -	struct obd_device *obd = chlg_obd_get(inode->i_rdev);
>> +	struct miscdevice *misc = file->private_data;
>> +	struct chlg_registered_dev *dev;
>> +	struct obd_device *obd;
>>   	struct task_struct *task;
>>   	int rc;
>>   
>> -	if (!obd)
>> -		return -ENODEV;
>> +	dev = container_of(misc, struct chlg_registered_dev, ced_misc);
>> +	obd = list_first_entry(&dev->ced_obds,
>> +			       struct obd_device,
>> +			       u.cli.cl_chg_dev_linkage);
>>   
>>   	crs = kzalloc(sizeof(*crs), GFP_KERNEL);
>>   	if (!crs)
>> @@ -669,13 +648,16 @@ int mdc_changelog_cdev_init(struct obd_device *obd)
>>   		goto out_unlock;
>>   	}
>>   
>> +	list_add_tail(&obd->u.cli.cl_chg_dev_linkage, &entry->ced_obds);
>> +	list_add_tail(&entry->ced_link, &chlg_registered_devices);
>> +
>>   	/* Register new character device */
>>   	rc = misc_register(&entry->ced_misc);
>> -	if (rc != 0)
>> +	if (rc != 0) {
>> +		list_del_init(&obd->u.cli.cl_chg_dev_linkage);
>> +		list_del(&entry->ced_link);
>>   		goto out_unlock;
>> -
>> -	list_add_tail(&obd->u.cli.cl_chg_dev_linkage, &entry->ced_obds);
>> -	list_add_tail(&entry->ced_link, &chlg_registered_devices);
>> +	}
>>   
>>   	entry = NULL;	/* prevent it from being freed below */
>>   
>> -- 
>> 2.14.0.rc0.dirty
>>
>>
NeilBrown Nov. 6, 2018, 3:11 a.m. UTC | #3
On Mon, Nov 05 2018, quentin.bouget@cea.fr wrote:

> Le 04/11/2018 à 22:34, James Simmons a écrit :
>>> Lockdep reports a possible deadlock between chlg_open() and
>>> mdc_changelog_cdev_init()
>>>
>>> mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
>>> calls misc_register() which takes misc_mtx.
>>> chlg_open() is called while misc_mtx is held, and tries to take
>>> chlg_registered_dev_lock.
>>> If these two functions race, a deadlock can occur as each thread will
>>> hold one of the locks while trying to take the other.
>>>
>>> chlg_open() does not need to take a lock.  It only uses the
>>> lock to stablize a list while looking for the matching
>>> chlg_registered_dev, and this can be found directly by examining
>>> file->private_data.
>>>
>>> So remove chlg_obd_get(), and use file->private_data to find the
>>> obd_device.
>>> Also ensure the device is fully initialized before calling
>>> misc_register().  This means setting up some list linkage before the
>>> call, and tearing it down if there is an error.
>> I have been testing this but I'm no HSM expert. I pushed this patch
>> to OpenSFS branch as well.
>>
>> https://jira.whamcloud.com/browse/LU-11617
>> https://review.whamcloud.com/#/c/33572/
>>
>> Reviewed-by: James Simmons <jsimmons@infradead.org>
>
> Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
>

Thanks to you both for the review.

NeilBrown
BOUGET Quentin Nov. 6, 2018, 10:41 a.m. UTC | #4
Le 06/11/2018 à 04:11, NeilBrown a écrit :
> On Mon, Nov 05 2018, quentin.bouget@cea.fr wrote:
>
>> Le 04/11/2018 à 22:34, James Simmons a écrit :
>>>> Lockdep reports a possible deadlock between chlg_open() and
>>>> mdc_changelog_cdev_init()
>>>>
>>>> mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
>>>> calls misc_register() which takes misc_mtx.
>>>> chlg_open() is called while misc_mtx is held, and tries to take
>>>> chlg_registered_dev_lock.
>>>> If these two functions race, a deadlock can occur as each thread will
>>>> hold one of the locks while trying to take the other.
>>>>
>>>> chlg_open() does not need to take a lock.  It only uses the
>>>> lock to stablize a list while looking for the matching
>>>> chlg_registered_dev, and this can be found directly by examining
>>>> file->private_data.
>>>>
>>>> So remove chlg_obd_get(), and use file->private_data to find the
>>>> obd_device.
>>>> Also ensure the device is fully initialized before calling
>>>> misc_register().  This means setting up some list linkage before the
>>>> call, and tearing it down if there is an error.
>>> I have been testing this but I'm no HSM expert. I pushed this patch
>>> to OpenSFS branch as well.
>>>
>>> https://jira.whamcloud.com/browse/LU-11617
>>> https://review.whamcloud.com/#/c/33572/
>>>
>>> Reviewed-by: James Simmons <jsimmons@infradead.org>
>> Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
>>
> Thanks to you both for the review.
>
> NeilBrown
>
Wait! I just realised there might be another issue!
I think there is now a race between chlg_open() and 
mdc_changelog_cdev_finish().

Wait! I just realised there might be another bigger issue!
The whole "take the first obd you can find" is broken! I opened a ticket 
<https://jira.whamcloud.com/browse/LU-11626> on whamcloud's JIRA about it.

Quentin Bouget
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">Le 06/11/2018 à 04:11, NeilBrown a
      écrit :<br>
    </div>
    <blockquote type="cite"
      cite="mid:87wopqyk5h.fsf@notabene.neil.brown.name">
      <pre wrap="">On Mon, Nov 05 2018, <a class="moz-txt-link-abbreviated" href="mailto:quentin.bouget@cea.fr">quentin.bouget@cea.fr</a> wrote:

</pre>
      <blockquote type="cite">
        <pre wrap="">Le 04/11/2018 à 22:34, James Simmons a écrit :
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="">Lockdep reports a possible deadlock between chlg_open() and
mdc_changelog_cdev_init()

mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
calls misc_register() which takes misc_mtx.
chlg_open() is called while misc_mtx is held, and tries to take
chlg_registered_dev_lock.
If these two functions race, a deadlock can occur as each thread will
hold one of the locks while trying to take the other.

chlg_open() does not need to take a lock.  It only uses the
lock to stablize a list while looking for the matching
chlg_registered_dev, and this can be found directly by examining
file-&gt;private_data.

So remove chlg_obd_get(), and use file-&gt;private_data to find the
obd_device.
Also ensure the device is fully initialized before calling
misc_register().  This means setting up some list linkage before the
call, and tearing it down if there is an error.
</pre>
          </blockquote>
          <pre wrap="">I have been testing this but I'm no HSM expert. I pushed this patch
to OpenSFS branch as well.

<a class="moz-txt-link-freetext" href="https://jira.whamcloud.com/browse/LU-11617">https://jira.whamcloud.com/browse/LU-11617</a>
<a class="moz-txt-link-freetext" href="https://review.whamcloud.com/#/c/33572/">https://review.whamcloud.com/#/c/33572/</a>

Reviewed-by: James Simmons <a class="moz-txt-link-rfc2396E" href="mailto:jsimmons@infradead.org">&lt;jsimmons@infradead.org&gt;</a>
</pre>
        </blockquote>
        <pre wrap="">
Reviewed-by: Quentin Bouget <a class="moz-txt-link-rfc2396E" href="mailto:quentin.bouget@cea.fr">&lt;quentin.bouget@cea.fr&gt;</a>

</pre>
      </blockquote>
      <pre wrap="">
Thanks to you both for the review.

NeilBrown

</pre>
    </blockquote>
    <p>Wait! I just realised there might be another issue!<br>
      I think there is now a race between chlg_open() and
      mdc_changelog_cdev_finish().<br>
    </p>
    Wait! I just realised there might be another bigger issue!<br>
    The whole "take the first obd you can find" is broken! I opened a <a
      moz-do-not-send="true"
      href="https://jira.whamcloud.com/browse/LU-11626">ticket</a> on
    whamcloud's JIRA about it.<br>
    <p>Quentin Bouget<br>
    </p>
  </body>
</html>
NeilBrown Nov. 7, 2018, 12:33 a.m. UTC | #5
On Tue, Nov 06 2018, quentin.bouget@cea.fr wrote:

> Le 06/11/2018 à 04:11, NeilBrown a écrit :
>> On Mon, Nov 05 2018, quentin.bouget@cea.fr wrote:
>>
>>> Le 04/11/2018 à 22:34, James Simmons a écrit :
>>>>> Lockdep reports a possible deadlock between chlg_open() and
>>>>> mdc_changelog_cdev_init()
>>>>>
>>>>> mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
>>>>> calls misc_register() which takes misc_mtx.
>>>>> chlg_open() is called while misc_mtx is held, and tries to take
>>>>> chlg_registered_dev_lock.
>>>>> If these two functions race, a deadlock can occur as each thread will
>>>>> hold one of the locks while trying to take the other.
>>>>>
>>>>> chlg_open() does not need to take a lock.  It only uses the
>>>>> lock to stablize a list while looking for the matching
>>>>> chlg_registered_dev, and this can be found directly by examining
>>>>> file->private_data.
>>>>>
>>>>> So remove chlg_obd_get(), and use file->private_data to find the
>>>>> obd_device.
>>>>> Also ensure the device is fully initialized before calling
>>>>> misc_register().  This means setting up some list linkage before the
>>>>> call, and tearing it down if there is an error.
>>>> I have been testing this but I'm no HSM expert. I pushed this patch
>>>> to OpenSFS branch as well.
>>>>
>>>> https://jira.whamcloud.com/browse/LU-11617
>>>> https://review.whamcloud.com/#/c/33572/
>>>>
>>>> Reviewed-by: James Simmons <jsimmons@infradead.org>
>>> Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
>>>
>> Thanks to you both for the review.
>>
>> NeilBrown
>>
> Wait! I just realised there might be another issue!
> I think there is now a race between chlg_open() and 
> mdc_changelog_cdev_finish().

Hmmm.. yes.  Also another deadlock possibility as the locks can be taken
in the wrong order here too.

>
> Wait! I just realised there might be another bigger issue!
> The whole "take the first obd you can find" is broken! I opened a ticket 
> <https://jira.whamcloud.com/browse/LU-11626> on whamcloud's JIRA about it.

Yep...
My guess is that chlg_load(), chlg_clear() and (possibly)
chlg_read_cat_process_cb() should take the mutex, choose an obd, used
it, and release the mutex.

I might post an RFC patch.

NeilBrown


>
> Quentin Bouget
NeilBrown Nov. 7, 2018, 12:43 a.m. UTC | #6
On Wed, Nov 07 2018, NeilBrown wrote:

> On Tue, Nov 06 2018, quentin.bouget@cea.fr wrote:
>
>> Le 06/11/2018 à 04:11, NeilBrown a écrit :
>>> On Mon, Nov 05 2018, quentin.bouget@cea.fr wrote:
>>>
>>>> Le 04/11/2018 à 22:34, James Simmons a écrit :
>>>>>> Lockdep reports a possible deadlock between chlg_open() and
>>>>>> mdc_changelog_cdev_init()
>>>>>>
>>>>>> mdc_changelog_cdev_init() takes chlg_registered_dev_lock and then
>>>>>> calls misc_register() which takes misc_mtx.
>>>>>> chlg_open() is called while misc_mtx is held, and tries to take
>>>>>> chlg_registered_dev_lock.
>>>>>> If these two functions race, a deadlock can occur as each thread will
>>>>>> hold one of the locks while trying to take the other.
>>>>>>
>>>>>> chlg_open() does not need to take a lock.  It only uses the
>>>>>> lock to stablize a list while looking for the matching
>>>>>> chlg_registered_dev, and this can be found directly by examining
>>>>>> file->private_data.
>>>>>>
>>>>>> So remove chlg_obd_get(), and use file->private_data to find the
>>>>>> obd_device.
>>>>>> Also ensure the device is fully initialized before calling
>>>>>> misc_register().  This means setting up some list linkage before the
>>>>>> call, and tearing it down if there is an error.
>>>>> I have been testing this but I'm no HSM expert. I pushed this patch
>>>>> to OpenSFS branch as well.
>>>>>
>>>>> https://jira.whamcloud.com/browse/LU-11617
>>>>> https://review.whamcloud.com/#/c/33572/
>>>>>
>>>>> Reviewed-by: James Simmons <jsimmons@infradead.org>
>>>> Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
>>>>
>>> Thanks to you both for the review.
>>>
>>> NeilBrown
>>>
>> Wait! I just realised there might be another issue!
>> I think there is now a race between chlg_open() and 
>> mdc_changelog_cdev_finish().
>
> Hmmm.. yes.  Also another deadlock possibility as the locks can be taken
> in the wrong order here too.

Sorry, I got that wrong - there is no other deadlock.
mdc_changelog_cdev_finish() can call misc_deregister() while holding
chlg_registered_dev_lock, and misc_deregister() will take misc_mtx, but
that is the right way around, not deadlock-causing.

NeilBrown

>
>>
>> Wait! I just realised there might be another bigger issue!
>> The whole "take the first obd you can find" is broken! I opened a ticket 
>> <https://jira.whamcloud.com/browse/LU-11626> on whamcloud's JIRA about it.
>
> Yep...
> My guess is that chlg_load(), chlg_clear() and (possibly)
> chlg_read_cat_process_cb() should take the mutex, choose an obd, used
> it, and release the mutex.
>
> I might post an RFC patch.
>
> NeilBrown
>
>
>>
>> Quentin Bouget
diff mbox series

Patch

diff --git a/drivers/staging/lustre/lustre/mdc/mdc_changelog.c b/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
index d83507cbf95c..af29ea73c48a 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_changelog.c
@@ -444,31 +444,6 @@  static ssize_t chlg_write(struct file *file, const char __user *buff,
 	return rc < 0 ? rc : count;
 }
 
-/**
- * Find the OBD device associated to a changelog character device.
- * @param[in]  cdev  character device instance descriptor
- * @return corresponding OBD device or NULL if none was found.
- */
-static struct obd_device *chlg_obd_get(dev_t cdev)
-{
-	int minor = MINOR(cdev);
-	struct obd_device *obd = NULL;
-	struct chlg_registered_dev *curr;
-
-	mutex_lock(&chlg_registered_dev_lock);
-	list_for_each_entry(curr, &chlg_registered_devices, ced_link) {
-		if (curr->ced_misc.minor == minor) {
-			/* take the first available OBD device attached */
-			obd = list_first_entry(&curr->ced_obds,
-					       struct obd_device,
-					       u.cli.cl_chg_dev_linkage);
-			break;
-		}
-	}
-	mutex_unlock(&chlg_registered_dev_lock);
-	return obd;
-}
-
 /**
  * Open handler, initialize internal CRS state and spawn prefetch thread if
  * needed.
@@ -479,12 +454,16 @@  static struct obd_device *chlg_obd_get(dev_t cdev)
 static int chlg_open(struct inode *inode, struct file *file)
 {
 	struct chlg_reader_state *crs;
-	struct obd_device *obd = chlg_obd_get(inode->i_rdev);
+	struct miscdevice *misc = file->private_data;
+	struct chlg_registered_dev *dev;
+	struct obd_device *obd;
 	struct task_struct *task;
 	int rc;
 
-	if (!obd)
-		return -ENODEV;
+	dev = container_of(misc, struct chlg_registered_dev, ced_misc);
+	obd = list_first_entry(&dev->ced_obds,
+			       struct obd_device,
+			       u.cli.cl_chg_dev_linkage);
 
 	crs = kzalloc(sizeof(*crs), GFP_KERNEL);
 	if (!crs)
@@ -669,13 +648,16 @@  int mdc_changelog_cdev_init(struct obd_device *obd)
 		goto out_unlock;
 	}
 
+	list_add_tail(&obd->u.cli.cl_chg_dev_linkage, &entry->ced_obds);
+	list_add_tail(&entry->ced_link, &chlg_registered_devices);
+
 	/* Register new character device */
 	rc = misc_register(&entry->ced_misc);
-	if (rc != 0)
+	if (rc != 0) {
+		list_del_init(&obd->u.cli.cl_chg_dev_linkage);
+		list_del(&entry->ced_link);
 		goto out_unlock;
-
-	list_add_tail(&obd->u.cli.cl_chg_dev_linkage, &entry->ced_obds);
-	list_add_tail(&entry->ced_link, &chlg_registered_devices);
+	}
 
 	entry = NULL;	/* prevent it from being freed below */