diff mbox series

[v2] tpm: Rework open/close/shutdown to avoid races

Message ID 20201215133801.546207-1-s.temerkhanov@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] tpm: Rework open/close/shutdown to avoid races | expand

Commit Message

Sergey Temerkhanov Dec. 15, 2020, 1:38 p.m. UTC
Avoid race condition at shutdown by shutting downn the TPM 2.0
devices synchronously. This eliminates the condition when the
shutdown sequence sets chip->ops to NULL leading to the following:

[ 1586.593561][ T8669] tpm2_del_space+0x28/0x73
[ 1586.598718][ T8669] tpmrm_release+0x27/0x33wq
[ 1586.603774][ T8669] __fput+0x109/0x1d
[ 1586.608380][ T8669] task_work_run+0x7c/0x90
[ 1586.613414][ T8669] prepare_exit_to_usermode+0xb8/0x128
[ 1586.619522][ T8669] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1586.626068][ T8669] RIP: 0033:0x4cb4bb

Signed-off-by: Sergey Temerkhanov <s.temerkhanov@gmail.com>
---
 drivers/char/tpm/tpm-chip.c  |  2 ++
 drivers/char/tpm/tpm-dev.c   | 20 +++++++++++++-------
 drivers/char/tpm/tpmrm-dev.c |  3 +++
 include/linux/tpm.h          |  6 ++++--
 4 files changed, 22 insertions(+), 9 deletions(-)

Comments

kernel test robot Dec. 15, 2020, 5:08 p.m. UTC | #1
Hi Sergey,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on linux/master linus/master v5.10 next-20201215]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sergey-Temerkhanov/tpm-Rework-open-close-shutdown-to-avoid-races/20201215-214304
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 93f998879cd95b3e4f2836e7b17d6d5ae035cf90
config: m68k-randconfig-r012-20201215 (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/b1f6f05f320b6b609ff70567a701e12504783b02
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sergey-Temerkhanov/tpm-Rework-open-close-shutdown-to-avoid-races/20201215-214304
        git checkout b1f6f05f320b6b609ff70567a701e12504783b02
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/char/tpm/tpm-dev.c: In function 'tpm_open':
>> drivers/char/tpm/tpm-dev.c:22:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
      22 |  int ret = 0;
         |      ^~~

vim +/ret +22 drivers/char/tpm/tpm-dev.c

    17	
    18	static int tpm_open(struct inode *inode, struct file *file)
    19	{
    20		struct tpm_chip *chip;
    21		struct file_priv *priv;
  > 22		int ret = 0;
    23	
    24		chip = container_of(inode->i_cdev, struct tpm_chip, cdev);
    25	
    26		/* It's assured that the chip will be opened just once,
    27		 * by the check of the chip reference count.
    28		 */
    29		if (atomic_fetch_inc(&chip->refcount)) {
    30			dev_dbg(&chip->dev, "Another process owns this TPM\n");
    31			ret = -EBUSY;
    32			goto out;
    33		}
    34	
    35		priv = kzalloc(sizeof(*priv), GFP_KERNEL);
    36		if (priv == NULL) {
    37			ret = -ENOMEM;
    38			goto out;
    39		}
    40	
    41		tpm_common_open(file, chip, priv, NULL);
    42	
    43		return 0;
    44	
    45	 out:
    46		atomic_dec(&chip->refcount);
    47		wake_up_all(&chip->waitq);
    48		return -ENOMEM;
    49	}
    50	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Jason Gunthorpe Dec. 15, 2020, 5:56 p.m. UTC | #2
On Tue, Dec 15, 2020 at 04:38:01PM +0300, Sergey Temerkhanov wrote:
> Avoid race condition at shutdown by shutting downn the TPM 2.0
> devices synchronously. This eliminates the condition when the
> shutdown sequence sets chip->ops to NULL leading to the following:
> 
> [ 1586.593561][ T8669] tpm2_del_space+0x28/0x73
> [ 1586.598718][ T8669] tpmrm_release+0x27/0x33wq
> [ 1586.603774][ T8669] __fput+0x109/0x1d
> [ 1586.608380][ T8669] task_work_run+0x7c/0x90
> [ 1586.613414][ T8669] prepare_exit_to_usermode+0xb8/0x128
> [ 1586.619522][ T8669] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 1586.626068][ T8669] RIP: 0033:0x4cb4bb
> 
> Signed-off-by: Sergey Temerkhanov <s.temerkhanov@gmail.com>
>  drivers/char/tpm/tpm-chip.c  |  2 ++
>  drivers/char/tpm/tpm-dev.c   | 20 +++++++++++++-------
>  drivers/char/tpm/tpmrm-dev.c |  3 +++
>  include/linux/tpm.h          |  6 ++++--
>  4 files changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
> index ddaeceb7e109..e94148b8e180 100644
> +++ b/drivers/char/tpm/tpm-chip.c
> @@ -295,6 +295,7 @@ static int tpm_class_shutdown(struct device *dev)
>  {
>  	struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev);
>  
> +	wait_event_idle(chip->waitq, !atomic_read(&chip->refcount));
>  	down_write(&chip->ops_sem);
>  	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
>  		if (!tpm_chip_start(chip)) {
> @@ -330,6 +331,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
>  
>  	mutex_init(&chip->tpm_mutex);
>  	init_rwsem(&chip->ops_sem);
> +	init_waitqueue_head(&chip->waitq);
>  
>  	chip->ops = ops;
>  
> diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
> index e2c0baa69fef..8558f0f7382c 100644
> +++ b/drivers/char/tpm/tpm-dev.c
> @@ -19,27 +19,32 @@ static int tpm_open(struct inode *inode, struct file *file)
>  {
>  	struct tpm_chip *chip;
>  	struct file_priv *priv;
> +	int ret = 0;
>  
>  	chip = container_of(inode->i_cdev, struct tpm_chip, cdev);
>  
>  	/* It's assured that the chip will be opened just once,
> -	 * by the check of is_open variable, which is protected
> -	 * by driver_lock. */
> -	if (test_and_set_bit(0, &chip->is_open)) {
> +	 * by the check of the chip reference count.
> +	 */
> +	if (atomic_fetch_inc(&chip->refcount)) {

Use a refcount_t for all this

> @@ -39,6 +40,8 @@ static int tpmrm_release(struct inode *inode, struct file *file)
>
>  	tpm_common_release(file, fpriv);
>  	tpm2_del_space(fpriv->chip, &priv->space);
> +	atomic_dec(&fpriv->chip->refcount);
> +	wake_up_all(&fpriv->chip->waitq);

The usual pattern is

        if (refcount_dec_and_test(&fpriv->chip->refcount))
	   wake_up_all(&fpriv->chip->waitq);

But this seems like madness, this blocks tpm_class_shutdown until
userspace closes a file descriptor, can't do it.

You need to have tpm_class_shutdown() remove the ops from the still
open FD and have that FD start returning -EIO when the ops are gone,
which is what the ops lock is already for.

Jason
James Bottomley Dec. 15, 2020, 6:51 p.m. UTC | #3
On Tue, 2020-12-15 at 16:38 +0300, Sergey Temerkhanov wrote:
> Avoid race condition at shutdown by shutting downn the TPM 2.0
> devices synchronously. This eliminates the condition when the
> shutdown sequence sets chip->ops to NULL leading to the following:
> 
> [ 1586.593561][ T8669] tpm2_del_space+0x28/0x73
> [ 1586.598718][ T8669] tpmrm_release+0x27/0x33wq
> [ 1586.603774][ T8669] __fput+0x109/0x1d
> [ 1586.608380][ T8669] task_work_run+0x7c/0x90
> [ 1586.613414][ T8669] prepare_exit_to_usermode+0xb8/0x128
> [ 1586.619522][ T8669] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 1586.626068][ T8669] RIP: 0033:0x4cb4bb

An actual bug report would have been helpful.  However, from this trace
it's easy to deduce that tpm2_del_space() didn't get converted to the
get/put of the chip ops ... it's still trying to do its own half arsed
thing with tpm_chip_start() and the mutex.  So isn't a much simpler fix
simply to convert it as below?  compile tested only, but if you can
test it out I'll send a proper patch.

James

---

diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 784b8b3cb903..0c0cd225046f 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size)
 
 void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space)
 {
-	mutex_lock(&chip->tpm_mutex);
-	if (!tpm_chip_start(chip)) {
+
+	if (tpm_try_get_ops(chip) == 0) {
 		tpm2_flush_sessions(chip, space);
-		tpm_chip_stop(chip);
+		tpm_put_ops(chip);
 	}
-	mutex_unlock(&chip->tpm_mutex);
+
 	kfree(space->context_buf);
 	kfree(space->session_buf);
 }
James Bottomley Dec. 17, 2020, 9:05 p.m. UTC | #4
On Tue, 2020-12-15 at 10:51 -0800, James Bottomley wrote:
> On Tue, 2020-12-15 at 16:38 +0300, Sergey Temerkhanov wrote:
> > Avoid race condition at shutdown by shutting downn the TPM 2.0
> > devices synchronously. This eliminates the condition when the
> > shutdown sequence sets chip->ops to NULL leading to the following:
> > 
> > [ 1586.593561][ T8669] tpm2_del_space+0x28/0x73
> > [ 1586.598718][ T8669] tpmrm_release+0x27/0x33wq
> > [ 1586.603774][ T8669] __fput+0x109/0x1d
> > [ 1586.608380][ T8669] task_work_run+0x7c/0x90
> > [ 1586.613414][ T8669] prepare_exit_to_usermode+0xb8/0x128
> > [ 1586.619522][ T8669] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > [ 1586.626068][ T8669] RIP: 0033:0x4cb4bb
> 
> An actual bug report would have been helpful.  However, from this
> trace it's easy to deduce that tpm2_del_space() didn't get converted
> to the get/put of the chip ops ... it's still trying to do its own
> half arsed thing with tpm_chip_start() and the mutex.  So isn't a
> much simpler fix simply to convert it as below?  compile tested only,
> but if you can test it out I'll send a proper patch.

I got this booted and running here, so I know it works.  What I still
need to know is does it fix your problem?

James
diff mbox series

Patch

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index ddaeceb7e109..e94148b8e180 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -295,6 +295,7 @@  static int tpm_class_shutdown(struct device *dev)
 {
 	struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev);
 
+	wait_event_idle(chip->waitq, !atomic_read(&chip->refcount));
 	down_write(&chip->ops_sem);
 	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
 		if (!tpm_chip_start(chip)) {
@@ -330,6 +331,7 @@  struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 
 	mutex_init(&chip->tpm_mutex);
 	init_rwsem(&chip->ops_sem);
+	init_waitqueue_head(&chip->waitq);
 
 	chip->ops = ops;
 
diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index e2c0baa69fef..8558f0f7382c 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -19,27 +19,32 @@  static int tpm_open(struct inode *inode, struct file *file)
 {
 	struct tpm_chip *chip;
 	struct file_priv *priv;
+	int ret = 0;
 
 	chip = container_of(inode->i_cdev, struct tpm_chip, cdev);
 
 	/* It's assured that the chip will be opened just once,
-	 * by the check of is_open variable, which is protected
-	 * by driver_lock. */
-	if (test_and_set_bit(0, &chip->is_open)) {
+	 * by the check of the chip reference count.
+	 */
+	if (atomic_fetch_inc(&chip->refcount)) {
 		dev_dbg(&chip->dev, "Another process owns this TPM\n");
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out;
 	}
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (priv == NULL)
+	if (priv == NULL) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
 	tpm_common_open(file, chip, priv, NULL);
 
 	return 0;
 
  out:
-	clear_bit(0, &chip->is_open);
+	atomic_dec(&chip->refcount);
+	wake_up_all(&chip->waitq);
 	return -ENOMEM;
 }
 
@@ -51,7 +56,8 @@  static int tpm_release(struct inode *inode, struct file *file)
 	struct file_priv *priv = file->private_data;
 
 	tpm_common_release(file, priv);
-	clear_bit(0, &priv->chip->is_open);
+	atomic_dec(&priv->chip->refcount);
+	wake_up_all(&priv->chip->waitq);
 	kfree(priv);
 
 	return 0;
diff --git a/drivers/char/tpm/tpmrm-dev.c b/drivers/char/tpm/tpmrm-dev.c
index eef0fb06ea83..fb3cb7b03814 100644
--- a/drivers/char/tpm/tpmrm-dev.c
+++ b/drivers/char/tpm/tpmrm-dev.c
@@ -28,6 +28,7 @@  static int tpmrm_open(struct inode *inode, struct file *file)
 	}
 
 	tpm_common_open(file, chip, &priv->priv, &priv->space);
+	atomic_inc(&chip->refcount);
 
 	return 0;
 }
@@ -39,6 +40,8 @@  static int tpmrm_release(struct inode *inode, struct file *file)
 
 	tpm_common_release(file, fpriv);
 	tpm2_del_space(fpriv->chip, &priv->space);
+	atomic_dec(&fpriv->chip->refcount);
+	wake_up_all(&fpriv->chip->waitq);
 	kfree(priv);
 
 	return 0;
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 8f4ff39f51e7..0c8842783823 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -22,6 +22,7 @@ 
 #include <linux/cdev.h>
 #include <linux/fs.h>
 #include <linux/highmem.h>
+#include <linux/atomic.h>
 #include <crypto/hash_info.h>
 
 #define TPM_DIGEST_SIZE 20	/* Max TPM v1.2 PCR size */
@@ -128,8 +129,9 @@  struct tpm_chip {
 
 	unsigned int flags;
 
-	int dev_num;		/* /dev/tpm# */
-	unsigned long is_open;	/* only one allowed */
+	int dev_num;		 /* /dev/tpm# */
+	atomic_t refcount;	 /* /dev/tmp# can only be opened once */
+	wait_queue_head_t waitq; /* Wait queue for synchronous ops */
 
 	char hwrng_name[64];
 	struct hwrng hwrng;