[2/3] drm/scheduler: Don't call wait_event_killable for signaled process.
diff mbox

Message ID 662c84bf-ac38-db28-1a11-b17719c9b8d0@daenzer.net
State New
Headers show

Commit Message

Michel Dänzer May 17, 2018, 2:48 p.m. UTC
On 2018-05-17 01:18 PM, Andrey Grodzovsky wrote:
> Hi Michele and others, I am trying to implement the approach bellow to
> resolve AMDGPU's hang when commands are stuck in pipe during process exit.
> 
> I noticed that once I implemented the file_operation.flush callback 
> then during run of X, i see the flush callback gets called not only for
> Xorg process but for other
> 
> processes such as 'xkbcomp' and even 'sh', it seems like Xorg passes his
> FDs to children, Christian mentioned he remembered a discussion to
> always set FD_CLOEXEC flag when opening the hardware device file, so
> 
> we suspect a bug in Xorg with regard to this behavior.

Try the libdrm patch below.

Note that the X server passes DRM file descriptors to DRI3 clients.

Comments

Grodzovsky, Andrey May 17, 2018, 3:33 p.m. UTC | #1
Thanks Michel, will give it a try.

BTW, just out of interest, how the FDs are passed to clients ? Using 
sockets ? Can you point me to the code which does it ?

Andrey


On 05/17/2018 10:48 AM, Michel Dänzer wrote:
> On 2018-05-17 01:18 PM, Andrey Grodzovsky wrote:
>> Hi Michele and others, I am trying to implement the approach bellow to
>> resolve AMDGPU's hang when commands are stuck in pipe during process exit.
>>
>> I noticed that once I implemented the file_operation.flush callback
>> then during run of X, i see the flush callback gets called not only for
>> Xorg process but for other
>>
>> processes such as 'xkbcomp' and even 'sh', it seems like Xorg passes his
>> FDs to children, Christian mentioned he remembered a discussion to
>> always set FD_CLOEXEC flag when opening the hardware device file, so
>>
>> we suspect a bug in Xorg with regard to this behavior.
> Try the libdrm patch below.
>
> Note that the X server passes DRM file descriptors to DRI3 clients.
>
>
> diff --git a/xf86drm.c b/xf86drm.c
> index 3a9d0ed2..c09437b0 100644
> --- a/xf86drm.c
> +++ b/xf86drm.c
> @@ -405,7 +405,7 @@ wait_for_udev:
>       }
>   #endif
>
> -    fd = open(buf, O_RDWR, 0);
> +    fd = open(buf, O_RDWR | O_CLOEXEC, 0);
>       drmMsg("drmOpenDevice: open result is %d, (%s)\n",
>              fd, fd < 0 ? strerror(errno) : "OK");
>       if (fd >= 0)
> @@ -425,7 +425,7 @@ wait_for_udev:
>               chmod(buf, devmode);
>           }
>       }
> -    fd = open(buf, O_RDWR, 0);
> +    fd = open(buf, O_RDWR | O_CLOEXEC, 0);
>       drmMsg("drmOpenDevice: open result is %d, (%s)\n",
>              fd, fd < 0 ? strerror(errno) : "OK");
>       if (fd >= 0)
> @@ -474,7 +474,7 @@ static int drmOpenMinor(int minor, int create, int type)
>       };
>
>       sprintf(buf, dev_name, DRM_DIR_NAME, minor);
> -    if ((fd = open(buf, O_RDWR, 0)) >= 0)
> +    if ((fd = open(buf, O_RDWR | O_CLOEXEC, 0)) >= 0)
>           return fd;
>       return -errno;
>   }
>
>
>
Michel Dänzer May 17, 2018, 3:52 p.m. UTC | #2
On 2018-05-17 05:33 PM, Andrey Grodzovsky wrote:
> 
> BTW, just out of interest, how the FDs are passed to clients ? Using
> sockets ?

Yes, via the socket used for the X11 display connection.


> Can you point me to the code which does it ?

xserver/dri3/dri3_request.c:dri3_send_open_reply() =>
xserver/os/io.c:WriteFdToClient()

Note that since dri3_send_open_reply passes TRUE for WriteFdToClient's
do_close parameter, the file descriptor is closed in the Xorg process
after sending it to the client.
Grodzovsky, Andrey May 17, 2018, 7:05 p.m. UTC | #3
On 05/17/2018 10:48 AM, Michel Dänzer wrote:
> On 2018-05-17 01:18 PM, Andrey Grodzovsky wrote:
>> Hi Michele and others, I am trying to implement the approach bellow to
>> resolve AMDGPU's hang when commands are stuck in pipe during process exit.
>>
>> I noticed that once I implemented the file_operation.flush callback
>> then during run of X, i see the flush callback gets called not only for
>> Xorg process but for other
>>
>> processes such as 'xkbcomp' and even 'sh', it seems like Xorg passes his
>> FDs to children, Christian mentioned he remembered a discussion to
>> always set FD_CLOEXEC flag when opening the hardware device file, so
>>
>> we suspect a bug in Xorg with regard to this behavior.
> Try the libdrm patch below.
>
> Note that the X server passes DRM file descriptors to DRI3 clients.

Tried it, didn't help. I still see other processes calling .flush for 
/dev/dri/card0

Thanks,
Andrey

>
>
> diff --git a/xf86drm.c b/xf86drm.c
> index 3a9d0ed2..c09437b0 100644
> --- a/xf86drm.c
> +++ b/xf86drm.c
> @@ -405,7 +405,7 @@ wait_for_udev:
>       }
>   #endif
>
> -    fd = open(buf, O_RDWR, 0);
> +    fd = open(buf, O_RDWR | O_CLOEXEC, 0);
>       drmMsg("drmOpenDevice: open result is %d, (%s)\n",
>              fd, fd < 0 ? strerror(errno) : "OK");
>       if (fd >= 0)
> @@ -425,7 +425,7 @@ wait_for_udev:
>               chmod(buf, devmode);
>           }
>       }
> -    fd = open(buf, O_RDWR, 0);
> +    fd = open(buf, O_RDWR | O_CLOEXEC, 0);
>       drmMsg("drmOpenDevice: open result is %d, (%s)\n",
>              fd, fd < 0 ? strerror(errno) : "OK");
>       if (fd >= 0)
> @@ -474,7 +474,7 @@ static int drmOpenMinor(int minor, int create, int type)
>       };
>
>       sprintf(buf, dev_name, DRM_DIR_NAME, minor);
> -    if ((fd = open(buf, O_RDWR, 0)) >= 0)
> +    if ((fd = open(buf, O_RDWR | O_CLOEXEC, 0)) >= 0)
>           return fd;
>       return -errno;
>   }
>
>
>

Patch
diff mbox

diff --git a/xf86drm.c b/xf86drm.c
index 3a9d0ed2..c09437b0 100644
--- a/xf86drm.c
+++ b/xf86drm.c
@@ -405,7 +405,7 @@  wait_for_udev:
     }
 #endif

-    fd = open(buf, O_RDWR, 0);
+    fd = open(buf, O_RDWR | O_CLOEXEC, 0);
     drmMsg("drmOpenDevice: open result is %d, (%s)\n",
            fd, fd < 0 ? strerror(errno) : "OK");
     if (fd >= 0)
@@ -425,7 +425,7 @@  wait_for_udev:
             chmod(buf, devmode);
         }
     }
-    fd = open(buf, O_RDWR, 0);
+    fd = open(buf, O_RDWR | O_CLOEXEC, 0);
     drmMsg("drmOpenDevice: open result is %d, (%s)\n",
            fd, fd < 0 ? strerror(errno) : "OK");
     if (fd >= 0)
@@ -474,7 +474,7 @@  static int drmOpenMinor(int minor, int create, int type)
     };

     sprintf(buf, dev_name, DRM_DIR_NAME, minor);
-    if ((fd = open(buf, O_RDWR, 0)) >= 0)
+    if ((fd = open(buf, O_RDWR | O_CLOEXEC, 0)) >= 0)
         return fd;
     return -errno;
 }