diff mbox series

[OSSTEST,3/5] TCP fix: Do not wait for ownerdaemon to speak

Message ID 20200928131241.30278-4-iwj@xenproject.org (mailing list archive)
State New, archived
Headers show
Series Fix TCP problem | expand

Commit Message

Ian Jackson Sept. 28, 2020, 1:12 p.m. UTC
From: Ian Jackson <ian.jackson@eu.citrix.com>

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tcl/JobDB-Executive.tcl | 13 +++++++++++++
 1 file changed, 13 insertions(+)
diff mbox series

Patch

diff --git a/tcl/JobDB-Executive.tcl b/tcl/JobDB-Executive.tcl
index 29c82821..4fe85696 100644
--- a/tcl/JobDB-Executive.tcl
+++ b/tcl/JobDB-Executive.tcl
@@ -414,7 +414,20 @@  proc become-task {comment} {
 
     set ownerqueue [socket $c(OwnerDaemonHost) $c(OwnerDaemonPort)]
     fconfigure $ownerqueue -buffering line -translation lf
+
+    # TCP connections can get into a weird state where the client
+    # thinks the connection is open but the server has no record
+    # of it.  To avoid this, have the client speak without waiting
+    # for the server.  We tolerate "unknown command" errors so
+    # that it is not necessary to restart the ownerdaemon since
+    # that is very disruptive.
+    #
+    # See A TCP "stuck" connection mystery"
+    # https://www.evanjones.ca/tcp-stuck-connection-mystery.html
+    puts $ownerqueue noop
     must-gets $ownerqueue {^OK ms-ownerdaemon\M}
+    must-gets $ownerqueue {^OK noop|^ERROR unknown command}
+
     puts $ownerqueue create-task
     must-gets $ownerqueue {^OK created-task (\d+) (\w+ [\[\]:.0-9a-f]+)$} \
         taskid refinfo