drm/i915/perf: Do not clear pollin for small user read buffers
diff mbox series

Message ID 20200331052222.76525-1-ashutosh.dixit@intel.com
State New
Headers show
Series
  • drm/i915/perf: Do not clear pollin for small user read buffers
Related show

Commit Message

Dixit, Ashutosh March 31, 2020, 5:22 a.m. UTC
It is wrong to block the user thread in the next poll when OA data is
already available which could not fit in the user buffer provided in
the previous read. In several cases the exact user buffer size is not
known. Blocking user space in poll can lead to data loss when the
buffer size used is smaller than the available data.

This change fixes this issue and allows user space to read all OA data
even when using a buffer size smaller than the available data using
multiple non-blocking reads rather than staying blocked in poll till
the next timer interrupt.

v2: Fix ret value for blocking reads (Umesh)
v3: Mistake during patch send (Ashutosh)
v4: Remove -EAGAIN from comment (Umesh)
v5: Improve condition for clearing pollin and return (Lionel)

Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 62 +++++++-------------------------
 1 file changed, 13 insertions(+), 49 deletions(-)

Comments

Lionel Landwerlin March 31, 2020, 7:34 a.m. UTC | #1
On 31/03/2020 08:22, Ashutosh Dixit wrote:
> It is wrong to block the user thread in the next poll when OA data is
> already available which could not fit in the user buffer provided in
> the previous read. In several cases the exact user buffer size is not
> known. Blocking user space in poll can lead to data loss when the
> buffer size used is smaller than the available data.
>
> This change fixes this issue and allows user space to read all OA data
> even when using a buffer size smaller than the available data using
> multiple non-blocking reads rather than staying blocked in poll till
> the next timer interrupt.
>
> v2: Fix ret value for blocking reads (Umesh)
> v3: Mistake during patch send (Ashutosh)
> v4: Remove -EAGAIN from comment (Umesh)
> v5: Improve condition for clearing pollin and return (Lionel)
>
> Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>


I forgot to mention this needs to be Cc: stable.

Still one nit below which should make the remaining function a bit simpler.


Thanks for your time.


-Lionel


> ---
>   drivers/gpu/drm/i915/i915_perf.c | 62 +++++++-------------------------
>   1 file changed, 13 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index c74ebac50015..9c21f28f89a7 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2914,49 +2914,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>   		gen8_update_reg_state_unlocked(ce, stream);
>   }
>   
> -/**
> - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
> - * @stream: An i915 perf stream
> - * @file: An i915 perf stream file
> - * @buf: destination buffer given by userspace
> - * @count: the number of bytes userspace wants to read
> - * @ppos: (inout) file seek position (unused)
> - *
> - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
> - * ensure that if we've successfully copied any data then reporting that takes
> - * precedence over any internal error status, so the data isn't lost.
> - *
> - * For example ret will be -ENOSPC whenever there is more buffered data than
> - * can be copied to userspace, but that's only interesting if we weren't able
> - * to copy some data because it implies the userspace buffer is too small to
> - * receive a single record (and we never split records).
> - *
> - * Another case with ret == -EFAULT is more of a grey area since it would seem
> - * like bad form for userspace to ask us to overrun its buffer, but the user
> - * knows best:
> - *
> - *   http://yarchive.net/comp/linux/partial_reads_writes.html
> - *
> - * Returns: The number of bytes copied or a negative error code on failure.
> - */
> -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
> -				     struct file *file,
> -				     char __user *buf,
> -				     size_t count,
> -				     loff_t *ppos)
> -{
> -	/* Note we keep the offset (aka bytes read) separate from any
> -	 * error status so that the final check for whether we return
> -	 * the bytes read with a higher precedence than any error (see
> -	 * comment below) doesn't need to be handled/duplicated in
> -	 * stream->ops->read() implementations.
> -	 */
> -	size_t offset = 0;
> -	int ret = stream->ops->read(stream, buf, count, &offset);
> -
> -	return offset ?: (ret ?: -EAGAIN);
> -}
> -
>   /**
>    * i915_perf_read - handles read() FOP for i915 perf stream FDs
>    * @file: An i915 perf stream file
> @@ -2982,7 +2939,8 @@ static ssize_t i915_perf_read(struct file *file,
>   {
>   	struct i915_perf_stream *stream = file->private_data;
>   	struct i915_perf *perf = stream->perf;
> -	ssize_t ret;
> +	size_t offset = 0;
> +	int __ret;
>   
>   	/* To ensure it's handled consistently we simply treat all reads of a
>   	 * disabled stream as an error. In particular it might otherwise lead
> @@ -2992,6 +2950,8 @@ static ssize_t i915_perf_read(struct file *file,
>   		return -EIO;
>   
>   	if (!(file->f_flags & O_NONBLOCK)) {
> +		ssize_t ret;
> +
>   		/* There's the small chance of false positives from
>   		 * stream->ops->wait_unlocked.
>   		 *
> @@ -3005,13 +2965,13 @@ static ssize_t i915_perf_read(struct file *file,
>   				return ret;
>   
>   			mutex_lock(&perf->lock);
> -			ret = i915_perf_read_locked(stream, file,
> -						    buf, count, ppos);
> +			__ret = stream->ops->read(stream, buf, count, &offset);


I think you can keep using ret and just change the loop to while (ret >= 
0) (which means no failure).

You will get -ENOSPC when the whole buffer is filled or some other error 
which should trigger stream closure.

Finally you can 0 if nothing was written but there was nothing to read 
and that keeps the read going.


> +			ret = offset ?: (__ret ?: -EAGAIN);
>   			mutex_unlock(&perf->lock);
>   		} while (ret == -EAGAIN);
>   	} else {
>   		mutex_lock(&perf->lock);
> -		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
> +		__ret = stream->ops->read(stream, buf, count, &offset);
>   		mutex_unlock(&perf->lock);
>   	}
>   
> @@ -3022,11 +2982,15 @@ static ssize_t i915_perf_read(struct file *file,
>   	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
>   	 * effectively ensures we back off until the next hrtimer callback
>   	 * before reporting another EPOLLIN event.
> +	 * The exception to this is if ops->read() returned -ENOSPC which means
> +	 * that more OA data is available than could fit in the user provided
> +	 * buffer. In this case we want the next poll() call to not block.
>   	 */
> -	if (ret >= 0 || ret == -EAGAIN)
> +	if (__ret != -ENOSPC)
>   		stream->pollin = false;
>   
> -	return ret;
> +	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
> +	return offset ?: (__ret ?: -EAGAIN);
>   }
>   
>   static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
Dixit, Ashutosh March 31, 2020, 11:29 p.m. UTC | #2
On Tue, 31 Mar 2020 00:34:10 -0700, Lionel Landwerlin wrote:
>
> On 31/03/2020 08:22, Ashutosh Dixit wrote:
> > It is wrong to block the user thread in the next poll when OA data is
> > already available which could not fit in the user buffer provided in
> > the previous read. In several cases the exact user buffer size is not
> > known. Blocking user space in poll can lead to data loss when the
> > buffer size used is smaller than the available data.
> >
> > This change fixes this issue and allows user space to read all OA data
> > even when using a buffer size smaller than the available data using
> > multiple non-blocking reads rather than staying blocked in poll till
> > the next timer interrupt.
> >
> > v2: Fix ret value for blocking reads (Umesh)
> > v3: Mistake during patch send (Ashutosh)
> > v4: Remove -EAGAIN from comment (Umesh)
> > v5: Improve condition for clearing pollin and return (Lionel)
> >
> > Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>
> I forgot to mention this needs to be Cc: stable.

I will Cc stable or send them the patch after it's finalized, hope that
will be ok?

>
> Still one nit below which should make the remaining function a bit simpler.
>
> Thanks for your time.
>
> -Lionel
>
>
> > ---
> >   drivers/gpu/drm/i915/i915_perf.c | 62 +++++++-------------------------
> >   1 file changed, 13 insertions(+), 49 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> > index c74ebac50015..9c21f28f89a7 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -2914,49 +2914,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> >		gen8_update_reg_state_unlocked(ce, stream);
> >   }
> >   -/**
> > - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
> > - * @stream: An i915 perf stream
> > - * @file: An i915 perf stream file
> > - * @buf: destination buffer given by userspace
> > - * @count: the number of bytes userspace wants to read
> > - * @ppos: (inout) file seek position (unused)
> > - *
> > - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
> > - * ensure that if we've successfully copied any data then reporting that takes
> > - * precedence over any internal error status, so the data isn't lost.
> > - *
> > - * For example ret will be -ENOSPC whenever there is more buffered data than
> > - * can be copied to userspace, but that's only interesting if we weren't able
> > - * to copy some data because it implies the userspace buffer is too small to
> > - * receive a single record (and we never split records).
> > - *
> > - * Another case with ret == -EFAULT is more of a grey area since it would seem
> > - * like bad form for userspace to ask us to overrun its buffer, but the user
> > - * knows best:
> > - *
> > - *   http://yarchive.net/comp/linux/partial_reads_writes.html
> > - *
> > - * Returns: The number of bytes copied or a negative error code on failure.
> > - */
> > -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
> > -				     struct file *file,
> > -				     char __user *buf,
> > -				     size_t count,
> > -				     loff_t *ppos)
> > -{
> > -	/* Note we keep the offset (aka bytes read) separate from any
> > -	 * error status so that the final check for whether we return
> > -	 * the bytes read with a higher precedence than any error (see
> > -	 * comment below) doesn't need to be handled/duplicated in
> > -	 * stream->ops->read() implementations.
> > -	 */
> > -	size_t offset = 0;
> > -	int ret = stream->ops->read(stream, buf, count, &offset);
> > -
> > -	return offset ?: (ret ?: -EAGAIN);
> > -}
> > -
> >   /**
> >    * i915_perf_read - handles read() FOP for i915 perf stream FDs
> >    * @file: An i915 perf stream file
> > @@ -2982,7 +2939,8 @@ static ssize_t i915_perf_read(struct file *file,
> >   {
> >	struct i915_perf_stream *stream = file->private_data;
> >	struct i915_perf *perf = stream->perf;
> > -	ssize_t ret;
> > +	size_t offset = 0;
> > +	int __ret;
> >		/* To ensure it's handled consistently we simply treat all reads of
> > a
> >	 * disabled stream as an error. In particular it might otherwise lead
> > @@ -2992,6 +2950,8 @@ static ssize_t i915_perf_read(struct file *file,
> >		return -EIO;
> >		if (!(file->f_flags & O_NONBLOCK)) {
> > +		ssize_t ret;
> > +
> >		/* There's the small chance of false positives from
> >		 * stream->ops->wait_unlocked.
> >		 *
> > @@ -3005,13 +2965,13 @@ static ssize_t i915_perf_read(struct file *file,
> >				return ret;
> >				mutex_lock(&perf->lock);
> > -			ret = i915_perf_read_locked(stream, file,
> > -						    buf, count, ppos);
> > +			__ret = stream->ops->read(stream, buf, count, &offset);
>
>
> I think you can keep using ret and just change the loop to while (ret >= 0)
> (which means no failure).
>
> You will get -ENOSPC when the whole buffer is filled or some other error
> which should trigger stream closure.

Sorry, but I did not follow you here. Are you saying even when we have data
to return (offset > 0) you want to go back and block (in the
wait_unlocked())? I am not sure if that will be acceptable, I'd think the
expectation would be to return data at the rate of the OA timer.

> Finally you can 0 if nothing was written but there was nothing to read and
> that keeps the read going.

Here I really lost you. Are you talking about -ENOSPC returning after
blocking multiple times and then resetting the error to 0 somehow?

Could you please look at v6 and let me know what you think? I have
eliminated the second ret variable by changing the do-while loop to "do { }
while (!offset && !ret);" and thus mostly retain the original logic.

Thanks!
--
Ashutosh

>
>
> > +			ret = offset ?: (__ret ?: -EAGAIN);
> >			mutex_unlock(&perf->lock);
> >		} while (ret == -EAGAIN);
> >	} else {
> >		mutex_lock(&perf->lock);
> > -		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
> > +		__ret = stream->ops->read(stream, buf, count, &offset);
> >		mutex_unlock(&perf->lock);
> >	}
> >   @@ -3022,11 +2982,15 @@ static ssize_t i915_perf_read(struct file
> > *file,
> >	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
> >	 * effectively ensures we back off until the next hrtimer callback
> >	 * before reporting another EPOLLIN event.
> > +	 * The exception to this is if ops->read() returned -ENOSPC which means
> > +	 * that more OA data is available than could fit in the user provided
> > +	 * buffer. In this case we want the next poll() call to not block.
> >	 */
> > -	if (ret >= 0 || ret == -EAGAIN)
> > +	if (__ret != -ENOSPC)
> >		stream->pollin = false;
> >   -	return ret;
> > +	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
> > +	return offset ?: (__ret ?: -EAGAIN);
> >   }
> >     static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer
> > *hrtimer)
>
>

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c74ebac50015..9c21f28f89a7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2914,49 +2914,6 @@  void i915_oa_init_reg_state(const struct intel_context *ce,
 		gen8_update_reg_state_unlocked(ce, stream);
 }
 
-/**
- * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
- * @stream: An i915 perf stream
- * @file: An i915 perf stream file
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @ppos: (inout) file seek position (unused)
- *
- * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
- * ensure that if we've successfully copied any data then reporting that takes
- * precedence over any internal error status, so the data isn't lost.
- *
- * For example ret will be -ENOSPC whenever there is more buffered data than
- * can be copied to userspace, but that's only interesting if we weren't able
- * to copy some data because it implies the userspace buffer is too small to
- * receive a single record (and we never split records).
- *
- * Another case with ret == -EFAULT is more of a grey area since it would seem
- * like bad form for userspace to ask us to overrun its buffer, but the user
- * knows best:
- *
- *   http://yarchive.net/comp/linux/partial_reads_writes.html
- *
- * Returns: The number of bytes copied or a negative error code on failure.
- */
-static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
-				     struct file *file,
-				     char __user *buf,
-				     size_t count,
-				     loff_t *ppos)
-{
-	/* Note we keep the offset (aka bytes read) separate from any
-	 * error status so that the final check for whether we return
-	 * the bytes read with a higher precedence than any error (see
-	 * comment below) doesn't need to be handled/duplicated in
-	 * stream->ops->read() implementations.
-	 */
-	size_t offset = 0;
-	int ret = stream->ops->read(stream, buf, count, &offset);
-
-	return offset ?: (ret ?: -EAGAIN);
-}
-
 /**
  * i915_perf_read - handles read() FOP for i915 perf stream FDs
  * @file: An i915 perf stream file
@@ -2982,7 +2939,8 @@  static ssize_t i915_perf_read(struct file *file,
 {
 	struct i915_perf_stream *stream = file->private_data;
 	struct i915_perf *perf = stream->perf;
-	ssize_t ret;
+	size_t offset = 0;
+	int __ret;
 
 	/* To ensure it's handled consistently we simply treat all reads of a
 	 * disabled stream as an error. In particular it might otherwise lead
@@ -2992,6 +2950,8 @@  static ssize_t i915_perf_read(struct file *file,
 		return -EIO;
 
 	if (!(file->f_flags & O_NONBLOCK)) {
+		ssize_t ret;
+
 		/* There's the small chance of false positives from
 		 * stream->ops->wait_unlocked.
 		 *
@@ -3005,13 +2965,13 @@  static ssize_t i915_perf_read(struct file *file,
 				return ret;
 
 			mutex_lock(&perf->lock);
-			ret = i915_perf_read_locked(stream, file,
-						    buf, count, ppos);
+			__ret = stream->ops->read(stream, buf, count, &offset);
+			ret = offset ?: (__ret ?: -EAGAIN);
 			mutex_unlock(&perf->lock);
 		} while (ret == -EAGAIN);
 	} else {
 		mutex_lock(&perf->lock);
-		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
+		__ret = stream->ops->read(stream, buf, count, &offset);
 		mutex_unlock(&perf->lock);
 	}
 
@@ -3022,11 +2982,15 @@  static ssize_t i915_perf_read(struct file *file,
 	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
 	 * effectively ensures we back off until the next hrtimer callback
 	 * before reporting another EPOLLIN event.
+	 * The exception to this is if ops->read() returned -ENOSPC which means
+	 * that more OA data is available than could fit in the user provided
+	 * buffer. In this case we want the next poll() call to not block.
 	 */
-	if (ret >= 0 || ret == -EAGAIN)
+	if (__ret != -ENOSPC)
 		stream->pollin = false;
 
-	return ret;
+	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
+	return offset ?: (__ret ?: -EAGAIN);
 }
 
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)