diff mbox series

[11/26] trace-cmd: Move accepting tsync connection out of tracecmd_tsync_with_host()

Message ID 20220514024756.1319681-12-rostedt@goodmis.org (mailing list archive)
State Accepted
Commit af9cb7315036be1e8b3afedd71ce21fd938f8609
Headers show
Series trace-cmd: Add agent proxy (agent on the host) | expand

Commit Message

Steven Rostedt May 14, 2022, 2:47 a.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

In preparation for adding a agent proxy, move the acceptance of the tsync
socket out of tracecmd_tsync_with_host(). This will allow the agent to do
the accept and then act like a host.

A side effect of this is that currently if the host fails to connect to
the agent for time synchronization, the thread will never continue and be
stuck at the "accept()". This will also hang the agent when it tries to
join that thread. The recording on the host side would work as normal, but
this would leave the agent process stuck, and this could cause a leak of
processes.

By accepting before the sync, the agent would not continue, and this would
also make the host side fail (this is a good thing), and then this issue
will be detected right at the beginning.

This also requires moving the tracecmd_msg_send_trace_resp() before the
synchronization, otherwise there would be a deadlock (with the agent
waiting for the record to connect, and the record waiting for a response
from the agent).

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 lib/trace-cmd/trace-timesync.c | 21 +----------------
 tracecmd/trace-agent.c         | 41 +++++++++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c
index 966aa56c5cc0..e192a74a1974 100644
--- a/lib/trace-cmd/trace-timesync.c
+++ b/lib/trace-cmd/trace-timesync.c
@@ -938,28 +938,9 @@  int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync)
 static void *tsync_agent_thread(void *data)
 {
 	struct tracecmd_time_sync *tsync = data;
-	long ret = 0;
-	int sd;
-
-	while (true) {
-		tracecmd_debug("Listening on fd:%d\n", tsync->msg_handle->fd);
-		sd = accept(tsync->msg_handle->fd, NULL, NULL);
-		tracecmd_debug("Accepted fd:%d\n", sd);
-		if (sd < 0) {
-			if (errno == EINTR)
-				continue;
-			ret = -1;
-			goto out;
-		}
-		break;
-	}
-	close(tsync->msg_handle->fd);
-	tsync->msg_handle->fd = sd;
 
 	tsync_with_host(tsync);
-
-out:
-	pthread_exit((void *)ret);
+	pthread_exit(NULL);
 }
 
 /**
diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c
index 2fe31f71e47a..7ee5fc8352c6 100644
--- a/tracecmd/trace-agent.c
+++ b/tracecmd/trace-agent.c
@@ -122,6 +122,28 @@  static void trace_print_connection(int fd, const char *network)
 		tracecmd_debug("Could not print connection fd:%d\n", fd);
 }
 
+static int wait_for_connection(int fd)
+{
+	int sd;
+
+	if (fd < 0)
+		return -1;
+
+	while (true) {
+		tracecmd_debug("Listening on fd:%d\n", fd);
+		sd = accept(fd, NULL, NULL);
+		tracecmd_debug("Accepted fd:%d\n", sd);
+		if (sd < 0) {
+			if (errno == EINTR)
+				continue;
+			return -1;
+		}
+		break;
+	}
+	close(fd);
+	return sd;
+}
+
 static void agent_handle(int sd, int nr_cpus, int page_size, const char *network)
 {
 	struct tracecmd_tsync_protos *tsync_protos = NULL;
@@ -186,23 +208,26 @@  static void agent_handle(int sd, int nr_cpus, int page_size, const char *network
 				fd = -1;
 			}
 		}
-		if (fd >= 0) {
+	}
+	trace_id = tracecmd_generate_traceid();
+	ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size,
+					   ports, use_fifos, trace_id,
+					   tsync_proto, tsync_port);
+	if (ret < 0)
+		die("Failed to send trace response");
+
+	if (tsync_proto) {
+		fd = wait_for_connection(fd);
+		if (fd >= 0)
 			tsync = tracecmd_tsync_with_host(fd, tsync_proto,
 							 get_clock(argc, argv),
 							 remote_id, local_id);
-		}
 		if (!tsync) {
 			warning("Failed to negotiate timestamps synchronization with the host");
 			if (fd >= 0)
 				close(fd);
 		}
 	}
-	trace_id = tracecmd_generate_traceid();
-	ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size,
-					   ports, use_fifos, trace_id,
-					   tsync_proto, tsync_port);
-	if (ret < 0)
-		die("Failed to send trace response");
 
 	trace_record_agent(msg_handle, nr_cpus, fds, argc, argv,
 			   use_fifos, trace_id, network);