diff mbox series

drm/i915/perf: Do not clear pollin for small user read buffers

Message ID 98393a3bbd2652886d895e1c3250e43c6e0f1a24.1585160174.git.ashutosh.dixit@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/perf: Do not clear pollin for small user read buffers | expand

Commit Message

Dixit, Ashutosh March 25, 2020, 6:20 p.m. UTC
It is wrong to block the user thread in the next poll when OA data is
already available which could not fit in the user buffer provided in
the previous read. In several cases the exact user buffer size is not
known. Blocking user space in poll can lead to data loss when the
buffer size used is smaller than the available data.

This change fixes this issue and allows user space to read all OA data
even when using a buffer size smaller than the available data using
multiple non-blocking reads rather than staying blocked in poll till
the next timer interrupt.

Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 62 ++++++--------------------------
 1 file changed, 11 insertions(+), 51 deletions(-)

Comments

Lionel Landwerlin March 25, 2020, 7:25 p.m. UTC | #1
On 25/03/2020 20:20, Ashutosh Dixit wrote:
> It is wrong to block the user thread in the next poll when OA data is
> already available which could not fit in the user buffer provided in
> the previous read. In several cases the exact user buffer size is not
> known. Blocking user space in poll can lead to data loss when the
> buffer size used is smaller than the available data.
>
> This change fixes this issue and allows user space to read all OA data
> even when using a buffer size smaller than the available data using
> multiple non-blocking reads rather than staying blocked in poll till
> the next timer interrupt.


Looks like you found a pretty important issue.

Can you write an IGT test case so that we don't run into it again?


Thanks a lot,


-Lionel


>
> Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c | 62 ++++++--------------------------
>   1 file changed, 11 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3222f6cd8255..c1a47c030941 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2957,49 +2957,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>   		gen8_update_reg_state_unlocked(ce, stream);
>   }
>   
> -/**
> - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
> - * @stream: An i915 perf stream
> - * @file: An i915 perf stream file
> - * @buf: destination buffer given by userspace
> - * @count: the number of bytes userspace wants to read
> - * @ppos: (inout) file seek position (unused)
> - *
> - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
> - * ensure that if we've successfully copied any data then reporting that takes
> - * precedence over any internal error status, so the data isn't lost.
> - *
> - * For example ret will be -ENOSPC whenever there is more buffered data than
> - * can be copied to userspace, but that's only interesting if we weren't able
> - * to copy some data because it implies the userspace buffer is too small to
> - * receive a single record (and we never split records).
> - *
> - * Another case with ret == -EFAULT is more of a grey area since it would seem
> - * like bad form for userspace to ask us to overrun its buffer, but the user
> - * knows best:
> - *
> - *   http://yarchive.net/comp/linux/partial_reads_writes.html
> - *
> - * Returns: The number of bytes copied or a negative error code on failure.
> - */
> -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
> -				     struct file *file,
> -				     char __user *buf,
> -				     size_t count,
> -				     loff_t *ppos)
> -{
> -	/* Note we keep the offset (aka bytes read) separate from any
> -	 * error status so that the final check for whether we return
> -	 * the bytes read with a higher precedence than any error (see
> -	 * comment below) doesn't need to be handled/duplicated in
> -	 * stream->ops->read() implementations.
> -	 */
> -	size_t offset = 0;
> -	int ret = stream->ops->read(stream, buf, count, &offset);
> -
> -	return offset ?: (ret ?: -EAGAIN);
> -}
> -
>   /**
>    * i915_perf_read - handles read() FOP for i915 perf stream FDs
>    * @file: An i915 perf stream file
> @@ -3025,6 +2982,8 @@ static ssize_t i915_perf_read(struct file *file,
>   {
>   	struct i915_perf_stream *stream = file->private_data;
>   	struct i915_perf *perf = stream->perf;
> +	size_t offset = 0;
> +	int __ret;
>   	ssize_t ret;
>   
>   	/* To ensure it's handled consistently we simply treat all reads of a
> @@ -3048,16 +3007,18 @@ static ssize_t i915_perf_read(struct file *file,
>   				return ret;
>   
>   			mutex_lock(&perf->lock);
> -			ret = i915_perf_read_locked(stream, file,
> -						    buf, count, ppos);
> +			__ret = stream->ops->read(stream, buf, count, &offset);
>   			mutex_unlock(&perf->lock);
>   		} while (ret == -EAGAIN);
>   	} else {
>   		mutex_lock(&perf->lock);
> -		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
> +		__ret = stream->ops->read(stream, buf, count, &offset);
>   		mutex_unlock(&perf->lock);
>   	}
>   
> +	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EAGAIN, ... */
> +	ret = offset ?: (__ret ?: -EAGAIN);
> +
>   	/* We allow the poll checking to sometimes report false positive EPOLLIN
>   	 * events where we might actually report EAGAIN on read() if there's
>   	 * not really any data available. In this situation though we don't
> @@ -3065,13 +3026,12 @@ static ssize_t i915_perf_read(struct file *file,
>   	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
>   	 * effectively ensures we back off until the next hrtimer callback
>   	 * before reporting another EPOLLIN event.
> +	 * The exception to this is if ops->read() returned -ENOSPC which means
> +	 * that more OA data is available than could fit in the user provided
> +	 * buffer. In this case we want the next poll() call to not block.
>   	 */
> -	if (ret >= 0 || ret == -EAGAIN) {
> -		/* Maybe make ->pollin per-stream state if we support multiple
> -		 * concurrent streams in the future.
> -		 */
> +	if ((ret > 0 || ret == -EAGAIN) && __ret != -ENOSPC)
>   		stream->pollin = false;
> -	}
>   
>   	return ret;
>   }
Dixit, Ashutosh March 25, 2020, 8:44 p.m. UTC | #2
On Wed, 25 Mar 2020 12:25:59 -0700, Lionel Landwerlin wrote:
>
> On 25/03/2020 20:20, Ashutosh Dixit wrote:
> > It is wrong to block the user thread in the next poll when OA data is
> > already available which could not fit in the user buffer provided in
> > the previous read. In several cases the exact user buffer size is not
> > known. Blocking user space in poll can lead to data loss when the
> > buffer size used is smaller than the available data.
> >
> > This change fixes this issue and allows user space to read all OA data
> > even when using a buffer size smaller than the available data using
> > multiple non-blocking reads rather than staying blocked in poll till
> > the next timer interrupt.
>
> Looks like you found a pretty important issue.
>
> Can you write an IGT test case so that we don't run into it again?

OK I'll see if I can come up with an IGT.

Thanks!
--
Ashutosh
Umesh Nerlige Ramappa March 26, 2020, 12:32 a.m. UTC | #3
On Wed, Mar 25, 2020 at 11:20:19AM -0700, Ashutosh Dixit wrote:
>It is wrong to block the user thread in the next poll when OA data is
>already available which could not fit in the user buffer provided in
>the previous read. In several cases the exact user buffer size is not
>known. Blocking user space in poll can lead to data loss when the
>buffer size used is smaller than the available data.
>
>This change fixes this issue and allows user space to read all OA data
>even when using a buffer size smaller than the available data using
>multiple non-blocking reads rather than staying blocked in poll till
>the next timer interrupt.
>
>Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/i915/i915_perf.c | 62 ++++++--------------------------
> 1 file changed, 11 insertions(+), 51 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index 3222f6cd8255..c1a47c030941 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -2957,49 +2957,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> 		gen8_update_reg_state_unlocked(ce, stream);
> }
>
>-/**
>- * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
>- * @stream: An i915 perf stream
>- * @file: An i915 perf stream file
>- * @buf: destination buffer given by userspace
>- * @count: the number of bytes userspace wants to read
>- * @ppos: (inout) file seek position (unused)
>- *
>- * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
>- * ensure that if we've successfully copied any data then reporting that takes
>- * precedence over any internal error status, so the data isn't lost.
>- *
>- * For example ret will be -ENOSPC whenever there is more buffered data than
>- * can be copied to userspace, but that's only interesting if we weren't able
>- * to copy some data because it implies the userspace buffer is too small to
>- * receive a single record (and we never split records).
>- *
>- * Another case with ret == -EFAULT is more of a grey area since it would seem
>- * like bad form for userspace to ask us to overrun its buffer, but the user
>- * knows best:
>- *
>- *   http://yarchive.net/comp/linux/partial_reads_writes.html
>- *
>- * Returns: The number of bytes copied or a negative error code on failure.
>- */
>-static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
>-				     struct file *file,
>-				     char __user *buf,
>-				     size_t count,
>-				     loff_t *ppos)
>-{
>-	/* Note we keep the offset (aka bytes read) separate from any
>-	 * error status so that the final check for whether we return
>-	 * the bytes read with a higher precedence than any error (see
>-	 * comment below) doesn't need to be handled/duplicated in
>-	 * stream->ops->read() implementations.
>-	 */
>-	size_t offset = 0;
>-	int ret = stream->ops->read(stream, buf, count, &offset);
>-
>-	return offset ?: (ret ?: -EAGAIN);
>-}
>-
> /**
>  * i915_perf_read - handles read() FOP for i915 perf stream FDs
>  * @file: An i915 perf stream file
>@@ -3025,6 +2982,8 @@ static ssize_t i915_perf_read(struct file *file,
> {
> 	struct i915_perf_stream *stream = file->private_data;
> 	struct i915_perf *perf = stream->perf;
>+	size_t offset = 0;
>+	int __ret;
> 	ssize_t ret;
>
> 	/* To ensure it's handled consistently we simply treat all reads of a
>@@ -3048,16 +3007,18 @@ static ssize_t i915_perf_read(struct file *file,
> 				return ret;
>
> 			mutex_lock(&perf->lock);
>-			ret = i915_perf_read_locked(stream, file,
>-						    buf, count, ppos);
>+			__ret = stream->ops->read(stream, buf, count, &offset);
> 			mutex_unlock(&perf->lock);
> 		} while (ret == -EAGAIN);

ret will never be EAGAIN here in the while. EAGAIN was returned by the 
deleted function in this patch if offset and ret are both 0. Although I 
don't see how that would be true.

> 	} else {
> 		mutex_lock(&perf->lock);
>-		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
>+		__ret = stream->ops->read(stream, buf, count, &offset);
> 		mutex_unlock(&perf->lock);
> 	}
>
>+	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EAGAIN, ... */

__ret may never be EAGAIN either (comment^). I don't see EAGAIN in the 
read path. 

That said, EAGAIN seems to have been introduced in the prior code 
specifically for retrying the blocking read and may not have much 
meaning otherwise.

Thanks,
Umesh

>+	ret = offset ?: (__ret ?: -EAGAIN);
>+
> 	/* We allow the poll checking to sometimes report false positive EPOLLIN
> 	 * events where we might actually report EAGAIN on read() if there's
> 	 * not really any data available. In this situation though we don't
>@@ -3065,13 +3026,12 @@ static ssize_t i915_perf_read(struct file *file,
> 	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
> 	 * effectively ensures we back off until the next hrtimer callback
> 	 * before reporting another EPOLLIN event.
>+	 * The exception to this is if ops->read() returned -ENOSPC which means
>+	 * that more OA data is available than could fit in the user provided
>+	 * buffer. In this case we want the next poll() call to not block.
> 	 */
>-	if (ret >= 0 || ret == -EAGAIN) {
>-		/* Maybe make ->pollin per-stream state if we support multiple
>-		 * concurrent streams in the future.
>-		 */
>+	if ((ret > 0 || ret == -EAGAIN) && __ret != -ENOSPC)
> 		stream->pollin = false;
>-	}
>
> 	return ret;
> }
>-- 
>2.25.2
>
Dixit, Ashutosh March 26, 2020, 1:52 a.m. UTC | #4
On Wed, 25 Mar 2020 17:32:35 -0700, Umesh Nerlige Ramappa wrote:
>
> On Wed, Mar 25, 2020 at 11:20:19AM -0700, Ashutosh Dixit wrote:
> > It is wrong to block the user thread in the next poll when OA data is
> > already available which could not fit in the user buffer provided in
> > the previous read. In several cases the exact user buffer size is not
> > known. Blocking user space in poll can lead to data loss when the
> > buffer size used is smaller than the available data.
> >
> > This change fixes this issue and allows user space to read all OA data
> > even when using a buffer size smaller than the available data using
> > multiple non-blocking reads rather than staying blocked in poll till
> > the next timer interrupt.
> >
> > Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_perf.c | 62 ++++++--------------------------
> > 1 file changed, 11 insertions(+), 51 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> > index 3222f6cd8255..c1a47c030941 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -2957,49 +2957,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> >		gen8_update_reg_state_unlocked(ce, stream);
> > }
> >
> > -/**
> > - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
> > - * @stream: An i915 perf stream
> > - * @file: An i915 perf stream file
> > - * @buf: destination buffer given by userspace
> > - * @count: the number of bytes userspace wants to read
> > - * @ppos: (inout) file seek position (unused)
> > - *
> > - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
> > - * ensure that if we've successfully copied any data then reporting that takes
> > - * precedence over any internal error status, so the data isn't lost.
> > - *
> > - * For example ret will be -ENOSPC whenever there is more buffered data than
> > - * can be copied to userspace, but that's only interesting if we weren't able
> > - * to copy some data because it implies the userspace buffer is too small to
> > - * receive a single record (and we never split records).
> > - *
> > - * Another case with ret == -EFAULT is more of a grey area since it would seem
> > - * like bad form for userspace to ask us to overrun its buffer, but the user
> > - * knows best:
> > - *
> > - *   http://yarchive.net/comp/linux/partial_reads_writes.html
> > - *
> > - * Returns: The number of bytes copied or a negative error code on failure.
> > - */
> > -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
> > -				     struct file *file,
> > -				     char __user *buf,
> > -				     size_t count,
> > -				     loff_t *ppos)
> > -{
> > -	/* Note we keep the offset (aka bytes read) separate from any
> > -	 * error status so that the final check for whether we return
> > -	 * the bytes read with a higher precedence than any error (see
> > -	 * comment below) doesn't need to be handled/duplicated in
> > -	 * stream->ops->read() implementations.
> > -	 */
> > -	size_t offset = 0;
> > -	int ret = stream->ops->read(stream, buf, count, &offset);
> > -
> > -	return offset ?: (ret ?: -EAGAIN);
> > -}
> > -
> > /**
> >  * i915_perf_read - handles read() FOP for i915 perf stream FDs
> >  * @file: An i915 perf stream file
> > @@ -3025,6 +2982,8 @@ static ssize_t i915_perf_read(struct file *file,
> > {
> >	struct i915_perf_stream *stream = file->private_data;
> >	struct i915_perf *perf = stream->perf;
> > +	size_t offset = 0;
> > +	int __ret;
> >	ssize_t ret;
> >
> >	/* To ensure it's handled consistently we simply treat all reads of a
> > @@ -3048,16 +3007,18 @@ static ssize_t i915_perf_read(struct file *file,
> >				return ret;
> >
> >			mutex_lock(&perf->lock);
> > -			ret = i915_perf_read_locked(stream, file,
> > -						    buf, count, ppos);
> > +			__ret = stream->ops->read(stream, buf, count, &offset);
> >			mutex_unlock(&perf->lock);
> >		} while (ret == -EAGAIN);
>
> ret will never be EAGAIN here in the while. EAGAIN was returned by the
> deleted function in this patch if offset and ret are both 0.

Good catch, I was so focussed on the non-blocking case that I missed the
blocking case.

> Although I don't see how that would be true.

As you say above, the old function i915_perf_read_locked() was doing this:

	return offset ?: (__ret ?: -EAGAIN);

So -EAGAIN is returned from i915_perf_read_locked() when there is no data
to read but otherwise there is no other error. Since this is blocking read
we cannot return -EAGAIN to user space (since there is no data to read), we
must go back and block again. That is the purpose of the while loop. I
broke this logic in this patch and will need to fix this.

>
> >	} else {
> >		mutex_lock(&perf->lock);
> > -		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
> > +		__ret = stream->ops->read(stream, buf, count, &offset);
> >		mutex_unlock(&perf->lock);
> >	}
> >
> > +	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EAGAIN, ... */
>
> __ret may never be EAGAIN either (comment^). I don't see EAGAIN in the read
> path.

It's here:

gen8_append_oa_reports()
{

        /*
         * An invalid tail pointer here means we're still waiting for the poll
         * hrtimer callback to give us a pointer
         */
        if (tail == INVALID_TAIL_PTR)
                return -EAGAIN;
}

> That said, EAGAIN seems to have been introduced in the prior code
> specifically for retrying the blocking read and may not have much meaning
> otherwise.

No that's not true. The kernel non-blocking read() function (in fops)
returns -EAGAIN when there is no data to read (the function never returns 0
except in case of EOF, in i915 perf code there is no EOF so read never
returns 0). This logic is the same as that in the previous code and we need
to preserve it.

Will post a v2 with the fix.

Thanks!
--
Ashutosh


>
> Thanks,
> Umesh
>
> > +	ret = offset ?: (__ret ?: -EAGAIN);
> > +
> >	/* We allow the poll checking to sometimes report false positive EPOLLIN
> >	 * events where we might actually report EAGAIN on read() if there's
> >	 * not really any data available. In this situation though we don't
> > @@ -3065,13 +3026,12 @@ static ssize_t i915_perf_read(struct file *file,
> >	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
> >	 * effectively ensures we back off until the next hrtimer callback
> >	 * before reporting another EPOLLIN event.
> > +	 * The exception to this is if ops->read() returned -ENOSPC which means
> > +	 * that more OA data is available than could fit in the user provided
> > +	 * buffer. In this case we want the next poll() call to not block.
> >	 */
> > -	if (ret >= 0 || ret == -EAGAIN) {
> > -		/* Maybe make ->pollin per-stream state if we support multiple
> > -		 * concurrent streams in the future.
> > -		 */
> > +	if ((ret > 0 || ret == -EAGAIN) && __ret != -ENOSPC)
> >		stream->pollin = false;
> > -	}
> >
> >	return ret;
> > }
> > --
> > 2.25.2
> >
Umesh Nerlige Ramappa March 26, 2020, 6:02 p.m. UTC | #5
On Wed, Mar 25, 2020 at 06:52:52PM -0700, Dixit, Ashutosh wrote:
>On Wed, 25 Mar 2020 17:32:35 -0700, Umesh Nerlige Ramappa wrote:
>>
>> On Wed, Mar 25, 2020 at 11:20:19AM -0700, Ashutosh Dixit wrote:
>> > It is wrong to block the user thread in the next poll when OA data is
>> > already available which could not fit in the user buffer provided in
>> > the previous read. In several cases the exact user buffer size is not
>> > known. Blocking user space in poll can lead to data loss when the
>> > buffer size used is smaller than the available data.
>> >
>> > This change fixes this issue and allows user space to read all OA data
>> > even when using a buffer size smaller than the available data using
>> > multiple non-blocking reads rather than staying blocked in poll till
>> > the next timer interrupt.
>> >
>> > Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>> > ---
>> > drivers/gpu/drm/i915/i915_perf.c | 62 ++++++--------------------------
>> > 1 file changed, 11 insertions(+), 51 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> > index 3222f6cd8255..c1a47c030941 100644
>> > --- a/drivers/gpu/drm/i915/i915_perf.c
>> > +++ b/drivers/gpu/drm/i915/i915_perf.c
>> > @@ -2957,49 +2957,6 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>> >		gen8_update_reg_state_unlocked(ce, stream);
>> > }
>> >
>> > -/**
>> > - * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
>> > - * @stream: An i915 perf stream
>> > - * @file: An i915 perf stream file
>> > - * @buf: destination buffer given by userspace
>> > - * @count: the number of bytes userspace wants to read
>> > - * @ppos: (inout) file seek position (unused)
>> > - *
>> > - * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
>> > - * ensure that if we've successfully copied any data then reporting that takes
>> > - * precedence over any internal error status, so the data isn't lost.
>> > - *
>> > - * For example ret will be -ENOSPC whenever there is more buffered data than
>> > - * can be copied to userspace, but that's only interesting if we weren't able
>> > - * to copy some data because it implies the userspace buffer is too small to
>> > - * receive a single record (and we never split records).
>> > - *
>> > - * Another case with ret == -EFAULT is more of a grey area since it would seem
>> > - * like bad form for userspace to ask us to overrun its buffer, but the user
>> > - * knows best:
>> > - *
>> > - *   http://yarchive.net/comp/linux/partial_reads_writes.html
>> > - *
>> > - * Returns: The number of bytes copied or a negative error code on failure.
>> > - */
>> > -static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
>> > -				     struct file *file,
>> > -				     char __user *buf,
>> > -				     size_t count,
>> > -				     loff_t *ppos)
>> > -{
>> > -	/* Note we keep the offset (aka bytes read) separate from any
>> > -	 * error status so that the final check for whether we return
>> > -	 * the bytes read with a higher precedence than any error (see
>> > -	 * comment below) doesn't need to be handled/duplicated in
>> > -	 * stream->ops->read() implementations.
>> > -	 */
>> > -	size_t offset = 0;
>> > -	int ret = stream->ops->read(stream, buf, count, &offset);
>> > -
>> > -	return offset ?: (ret ?: -EAGAIN);
>> > -}
>> > -
>> > /**
>> >  * i915_perf_read - handles read() FOP for i915 perf stream FDs
>> >  * @file: An i915 perf stream file
>> > @@ -3025,6 +2982,8 @@ static ssize_t i915_perf_read(struct file *file,
>> > {
>> >	struct i915_perf_stream *stream = file->private_data;
>> >	struct i915_perf *perf = stream->perf;
>> > +	size_t offset = 0;
>> > +	int __ret;
>> >	ssize_t ret;
>> >
>> >	/* To ensure it's handled consistently we simply treat all reads of a
>> > @@ -3048,16 +3007,18 @@ static ssize_t i915_perf_read(struct file *file,
>> >				return ret;
>> >
>> >			mutex_lock(&perf->lock);
>> > -			ret = i915_perf_read_locked(stream, file,
>> > -						    buf, count, ppos);
>> > +			__ret = stream->ops->read(stream, buf, count, &offset);
>> >			mutex_unlock(&perf->lock);
>> >		} while (ret == -EAGAIN);
>>
>> ret will never be EAGAIN here in the while. EAGAIN was returned by the
>> deleted function in this patch if offset and ret are both 0.
>
>Good catch, I was so focussed on the non-blocking case that I missed the
>blocking case.
>
>> Although I don't see how that would be true.
>
>As you say above, the old function i915_perf_read_locked() was doing this:
>
>	return offset ?: (__ret ?: -EAGAIN);
>
>So -EAGAIN is returned from i915_perf_read_locked() when there is no data
>to read but otherwise there is no other error. Since this is blocking read
>we cannot return -EAGAIN to user space (since there is no data to read), we
>must go back and block again. That is the purpose of the while loop. I
>broke this logic in this patch and will need to fix this.
>
>>
>> >	} else {
>> >		mutex_lock(&perf->lock);
>> > -		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
>> > +		__ret = stream->ops->read(stream, buf, count, &offset);
>> >		mutex_unlock(&perf->lock);
>> >	}
>> >
>> > +	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EAGAIN, ... */
>>
>> __ret may never be EAGAIN either (comment^). I don't see EAGAIN in the read
>> path.
>
>It's here:
>
>gen8_append_oa_reports()
>{
>
>        /*
>         * An invalid tail pointer here means we're still waiting for the poll
>         * hrtimer callback to give us a pointer
>         */
>        if (tail == INVALID_TAIL_PTR)
>                return -EAGAIN;
>}

Oh, you are right, EAGAIN is returned here. I was looking for it with 
the poll period patch series applied and these references are removed in 
that series.

Thanks,
Umesh

>
>> That said, EAGAIN seems to have been introduced in the prior code
>> specifically for retrying the blocking read and may not have much meaning
>> otherwise.
>
>No that's not true. The kernel non-blocking read() function (in fops)
>returns -EAGAIN when there is no data to read (the function never returns 0
>except in case of EOF, in i915 perf code there is no EOF so read never
>returns 0). This logic is the same as that in the previous code and we need
>to preserve it.
>
>Will post a v2 with the fix.
>
>Thanks!
>--
>Ashutosh
>
>
>>
>> Thanks,
>> Umesh
>>
>> > +	ret = offset ?: (__ret ?: -EAGAIN);
>> > +
>> >	/* We allow the poll checking to sometimes report false positive EPOLLIN
>> >	 * events where we might actually report EAGAIN on read() if there's
>> >	 * not really any data available. In this situation though we don't
>> > @@ -3065,13 +3026,12 @@ static ssize_t i915_perf_read(struct file *file,
>> >	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
>> >	 * effectively ensures we back off until the next hrtimer callback
>> >	 * before reporting another EPOLLIN event.
>> > +	 * The exception to this is if ops->read() returned -ENOSPC which means
>> > +	 * that more OA data is available than could fit in the user provided
>> > +	 * buffer. In this case we want the next poll() call to not block.
>> >	 */
>> > -	if (ret >= 0 || ret == -EAGAIN) {
>> > -		/* Maybe make ->pollin per-stream state if we support multiple
>> > -		 * concurrent streams in the future.
>> > -		 */
>> > +	if ((ret > 0 || ret == -EAGAIN) && __ret != -ENOSPC)
>> >		stream->pollin = false;
>> > -	}
>> >
>> >	return ret;
>> > }
>> > --
>> > 2.25.2
>> >
Dixit, Ashutosh March 27, 2020, 1:28 a.m. UTC | #6
On Thu, 26 Mar 2020 11:02:46 -0700, Umesh Nerlige Ramappa wrote:
> On Wed, Mar 25, 2020 at 06:52:52PM -0700, Dixit, Ashutosh wrote:
> > On Wed, 25 Mar 2020 17:32:35 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >> On Wed, Mar 25, 2020 at 11:20:19AM -0700, Ashutosh Dixit wrote:
> >> >
> >> > +	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EAGAIN, ... */
> >>
> >> __ret may never be EAGAIN either (comment^). I don't see EAGAIN in the read
> >> path.
> >
> > It's here:
> >
> > gen8_append_oa_reports()
> > {
> >
> >        /*
> >         * An invalid tail pointer here means we're still waiting for the poll
> >         * hrtimer callback to give us a pointer
> >         */
> >        if (tail == INVALID_TAIL_PTR)
> >                return -EAGAIN;
> > }
>
> Oh, you are right, EAGAIN is returned here. I was looking for it with the
> poll period patch series applied and these references are removed in that
> series.

No, you are right, since that series is likely to be merged first, it is
better to remove it from this patch.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3222f6cd8255..c1a47c030941 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2957,49 +2957,6 @@  void i915_oa_init_reg_state(const struct intel_context *ce,
 		gen8_update_reg_state_unlocked(ce, stream);
 }
 
-/**
- * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
- * @stream: An i915 perf stream
- * @file: An i915 perf stream file
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @ppos: (inout) file seek position (unused)
- *
- * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
- * ensure that if we've successfully copied any data then reporting that takes
- * precedence over any internal error status, so the data isn't lost.
- *
- * For example ret will be -ENOSPC whenever there is more buffered data than
- * can be copied to userspace, but that's only interesting if we weren't able
- * to copy some data because it implies the userspace buffer is too small to
- * receive a single record (and we never split records).
- *
- * Another case with ret == -EFAULT is more of a grey area since it would seem
- * like bad form for userspace to ask us to overrun its buffer, but the user
- * knows best:
- *
- *   http://yarchive.net/comp/linux/partial_reads_writes.html
- *
- * Returns: The number of bytes copied or a negative error code on failure.
- */
-static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
-				     struct file *file,
-				     char __user *buf,
-				     size_t count,
-				     loff_t *ppos)
-{
-	/* Note we keep the offset (aka bytes read) separate from any
-	 * error status so that the final check for whether we return
-	 * the bytes read with a higher precedence than any error (see
-	 * comment below) doesn't need to be handled/duplicated in
-	 * stream->ops->read() implementations.
-	 */
-	size_t offset = 0;
-	int ret = stream->ops->read(stream, buf, count, &offset);
-
-	return offset ?: (ret ?: -EAGAIN);
-}
-
 /**
  * i915_perf_read - handles read() FOP for i915 perf stream FDs
  * @file: An i915 perf stream file
@@ -3025,6 +2982,8 @@  static ssize_t i915_perf_read(struct file *file,
 {
 	struct i915_perf_stream *stream = file->private_data;
 	struct i915_perf *perf = stream->perf;
+	size_t offset = 0;
+	int __ret;
 	ssize_t ret;
 
 	/* To ensure it's handled consistently we simply treat all reads of a
@@ -3048,16 +3007,18 @@  static ssize_t i915_perf_read(struct file *file,
 				return ret;
 
 			mutex_lock(&perf->lock);
-			ret = i915_perf_read_locked(stream, file,
-						    buf, count, ppos);
+			__ret = stream->ops->read(stream, buf, count, &offset);
 			mutex_unlock(&perf->lock);
 		} while (ret == -EAGAIN);
 	} else {
 		mutex_lock(&perf->lock);
-		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
+		__ret = stream->ops->read(stream, buf, count, &offset);
 		mutex_unlock(&perf->lock);
 	}
 
+	/* Possible values for __ret are 0, -EFAULT, -ENOSPC, -EAGAIN, ... */
+	ret = offset ?: (__ret ?: -EAGAIN);
+
 	/* We allow the poll checking to sometimes report false positive EPOLLIN
 	 * events where we might actually report EAGAIN on read() if there's
 	 * not really any data available. In this situation though we don't
@@ -3065,13 +3026,12 @@  static ssize_t i915_perf_read(struct file *file,
 	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
 	 * effectively ensures we back off until the next hrtimer callback
 	 * before reporting another EPOLLIN event.
+	 * The exception to this is if ops->read() returned -ENOSPC which means
+	 * that more OA data is available than could fit in the user provided
+	 * buffer. In this case we want the next poll() call to not block.
 	 */
-	if (ret >= 0 || ret == -EAGAIN) {
-		/* Maybe make ->pollin per-stream state if we support multiple
-		 * concurrent streams in the future.
-		 */
+	if ((ret > 0 || ret == -EAGAIN) && __ret != -ENOSPC)
 		stream->pollin = false;
-	}
 
 	return ret;
 }