diff mbox series

[net-next,v2,3/3] connector/cn_proc: Selftest for threads

Message ID 20241015173014.1083069-4-anjali.k.kulkarni@oracle.com (mailing list archive)
State New
Headers show
Series Threads support in proc connector | expand

Commit Message

Anjali Kulkarni Oct. 15, 2024, 5:30 p.m. UTC
Test to check if setting PROC_CN_MCAST_NOTIFY in proc connector API, allows
a thread's non-zero exit status to be returned to proc_filter.

The threads.c program creates 2 child threads. 1st thread handles signal
SIGSEGV, and 2nd thread needs to indicate some error condition (value 1)
to the kernel, instead of using pthread_exit() with 1.

In both cases, child sends notify_netlink_thread_exit(exit_code) to kernel,
to let kernel know it has exited abnormally with exit_code.

Compile:
    make thread
    make proc_filter
To see non-zero exit notifications, run:
    ./proc_filter -f
Run threads code in another window:
    ./threads
Note the 2 child thread IDs reported above
Send SIGSEGV signal to the child handling SIGSEGV:
    kill -11 <child1-tid>
Watch the child 1 tid being notified with exit code 11 to proc_filter
Watch child 2 tid being notified with exit code 1 (value defined in code)
to proc_filter

Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
---
 tools/testing/selftests/connector/Makefile    |  23 +++-
 .../testing/selftests/connector/proc_filter.c |   5 +
 tools/testing/selftests/connector/thread.c    | 116 ++++++++++++++++++
 .../selftests/connector/thread_filter.c       |  96 +++++++++++++++
 4 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/connector/thread.c
 create mode 100644 tools/testing/selftests/connector/thread_filter.c

Comments

Liam R. Howlett Oct. 15, 2024, 7:27 p.m. UTC | #1
* Anjali Kulkarni <anjali.k.kulkarni@oracle.com> [241015 13:30]:
> Test to check if setting PROC_CN_MCAST_NOTIFY in proc connector API, allows
> a thread's non-zero exit status to be returned to proc_filter.
> 
> The threads.c program creates 2 child threads. 1st thread handles signal
> SIGSEGV, and 2nd thread needs to indicate some error condition (value 1)
> to the kernel, instead of using pthread_exit() with 1.
> 
> In both cases, child sends notify_netlink_thread_exit(exit_code) to kernel,
> to let kernel know it has exited abnormally with exit_code.
> 
> Compile:
>     make thread
>     make proc_filter
> To see non-zero exit notifications, run:
>     ./proc_filter -f
> Run threads code in another window:
>     ./threads
> Note the 2 child thread IDs reported above
> Send SIGSEGV signal to the child handling SIGSEGV:
>     kill -11 <child1-tid>
> Watch the child 1 tid being notified with exit code 11 to proc_filter
> Watch child 2 tid being notified with exit code 1 (value defined in code)
> to proc_filter
> 
> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> ---
>  tools/testing/selftests/connector/Makefile    |  23 +++-
>  .../testing/selftests/connector/proc_filter.c |   5 +
>  tools/testing/selftests/connector/thread.c    | 116 ++++++++++++++++++
>  .../selftests/connector/thread_filter.c       |  96 +++++++++++++++
>  4 files changed, 239 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/connector/thread.c
>  create mode 100644 tools/testing/selftests/connector/thread_filter.c
> 
> diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
> index 92188b9bac5c..bf335826bc3b 100644
> --- a/tools/testing/selftests/connector/Makefile
> +++ b/tools/testing/selftests/connector/Makefile
> @@ -1,5 +1,26 @@
>  # SPDX-License-Identifier: GPL-2.0
> -CFLAGS += -Wall $(KHDR_INCLUDES)
> +KERNEL="../../../.."
> +
> +CFLAGS += -Wall $(KHDR_INCLUDES) -I $(KERNEL)/include/uapi -I $(KERNEL)/include
> +
> +proc_filter: proc_filter.o
> +	cc proc_filter.o -o proc_filter
> +
> +proc_filter.o: proc_filter.c
> +	cc -c proc_filter.c -o proc_filter.o $(CFLAGS)
> +
> +thread: thread.o thread_filter.o
> +	cc thread.o thread_filter.o -o thread
> +
> +thread.o: thread.c $(DEPS)
> +		cc -c thread.c -o thread.o $(CFLAGS)
> +
> +thread_filter.o: thread_filter.c
> +		cc -c thread_filter.c -o thread_filter.o $(CFLAGS)
> +
> +define EXTRA_CLEAN
> +	rm *.o thread
> +endef
>  
>  TEST_GEN_PROGS = proc_filter
>  
> diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
> index 4a825b997666..6fb4842894f8 100644
> --- a/tools/testing/selftests/connector/proc_filter.c
> +++ b/tools/testing/selftests/connector/proc_filter.c
> @@ -1,4 +1,9 @@
>  // SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> + *
> + * Copyright (c) 2024 Oracle and/or its affiliates.
> + */
>  
>  #include <sys/types.h>
>  #include <sys/epoll.h>
> diff --git a/tools/testing/selftests/connector/thread.c b/tools/testing/selftests/connector/thread.c
> new file mode 100644
> index 000000000000..8c9abf6115d6
> --- /dev/null
> +++ b/tools/testing/selftests/connector/thread.c
> @@ -0,0 +1,116 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> + *
> + * Copyright (c) 2024 Oracle and/or its affiliates.
> + */
> +
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <signal.h>
> +
> +/*
> + * This code tests a thread exit notification when thread exits abnormally.
> + * Normally, when a thread exits abnormally, the kernel is not aware of the
> + * exit code. This is usually only conveyed from child to parent via the
> + * pthread_exit() and pthread_join() calls. Sometimes, however, a parent
> + * process cannot monitor all child processes via pthread_join(), particularly
> + * when there is a huge amount of child processes. In this case, the parent
> + * has created the child with PTHREAD_CREATE_DETACHED attribute.
> + * To fix this problem, either when child wants to convey non-zero exit via
> + * pthread_exit() or in a signal handler, the child can notify the kernel's
> + * connector module it's exit status via a netlink call with new type
> + * PROC_CN_MCAST_NOTIFY. (Implemented in the thread_filter.c file).
> + * This will send the exit code from the child to the kernel, which the kernel
> + * can later return to proc_filter program when the child actually exits.
> + * To test this usecase:
> + * Compile:
> + *	make thread
> + *	make proc_filter
> + * To see non-zero exit notifications, run:
> + *	./proc_filter -f
> + * Start the threads code, creating 2 threads, in another window:
> + *	./threads
> + * Note the 2 child thread IDs reported above
> + * Send SIGSEGV signal to the child handling SIGSEGV:
> + *	kill -11 <child1-tid>
> + * Watch the event being notified with exit code 11 to proc_filter
> + * Watch child 2 tid being notified with exit code 1 (value defined in code)
> + * to proc_filter
> + */
> +
> +extern int notify_netlink_thread_exit(unsigned int exit_code);
> +
> +static void sigsegvh(int sig)
> +{
> +	unsigned int exit_code = (unsigned int) sig;
> +	/*
> +	 * Send any non-zero value to get a notification. Here we are
> +	 * sending the signal number for SIGSEGV which is 11
> +	 */
> +	notify_netlink_thread_exit(exit_code);
> +}
> +
> +void *threadc1(void *ptr)
> +{
> +	signal(SIGSEGV, sigsegvh);
> +	printf("Child 1 thread id %d, handling SIGSEGV\n", gettid());
> +	sleep(20);
> +	pthread_exit(NULL);
> +}
> +
> +void *threadc2(void *ptr)
> +{
> +	int exit_val = 1;
> +
> +	printf("Child 2 thread id %d, wants to exit with value %d\n",
> +			gettid(), exit_val);
> +	sleep(2);
> +	notify_netlink_thread_exit(exit_val);
> +	pthread_exit(NULL);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	pthread_t thread1, thread2;
> +	pthread_attr_t attr1, attr2;
> +	int ret;
> +
> +	ret = pthread_attr_init(&attr1);
> +	if (ret != 0) {
> +		perror("pthread_attr_init failed");
> +		exit(ret);
> +	}
> +	ret = pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED);
> +	if (ret != 0) {
> +		perror("pthread_attr_setdetachstate failed");
> +		exit(ret);
> +	}
> +	ret = pthread_create(&thread1, &attr1, *threadc1, NULL);
> +	if (ret != 0) {
> +		perror("pthread_create failed");
> +		exit(ret);
> +	}
> +
> +	ret = pthread_attr_init(&attr2);
> +	if (ret != 0) {
> +		perror("pthread_attr_init failed");
> +		exit(ret);
> +	}
> +	ret = pthread_attr_setdetachstate(&attr2, PTHREAD_CREATE_DETACHED);
> +	if (ret != 0) {
> +		perror("pthread_attr_setdetachstate failed");
> +		exit(ret);
> +	}
> +	ret = pthread_create(&thread2, &attr2, *threadc2, NULL);
> +	if (ret != 0) {
> +		perror("pthread_create failed");
> +		exit(ret);
> +	}

I expected the test to check for the correct value to be returned?
Could you use pthread_join() and verify the same value is returned
through the new mechanism, or will they not match?

> +
> +	/* Wait for children to exit or be killed */
> +	sleep(30);
> +	exit(0);
> +}
> diff --git a/tools/testing/selftests/connector/thread_filter.c b/tools/testing/selftests/connector/thread_filter.c
> new file mode 100644
> index 000000000000..3da740aa7537
> --- /dev/null
> +++ b/tools/testing/selftests/connector/thread_filter.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> + *
> + * Copyright (c) 2024 Oracle and/or its affiliates.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/epoll.h>
> +#include <sys/socket.h>
> +#include <linux/netlink.h>
> +#include <linux/connector.h>
> +#include <linux/cn_proc.h>
> +
> +#include <stddef.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <strings.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include <string.h>
> +
> +#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
> +			sizeof(struct proc_input))
> +
> +/*
> + * Send PROC_CN_MCAST_NOTIFY type notification to the connector code in kernel.
> + * This will send the exit_code specified by user to the connector layer, so
> + * it can send a notification for that event to any listening process
> + */
> +int send_message(int nl_sock, unsigned int exit_code)
> +{
> +	char buff[NL_MESSAGE_SIZE];
> +	struct nlmsghdr *hdr;
> +	struct cn_msg *msg;
> +
> +	hdr = (struct nlmsghdr *)buff;
> +	hdr->nlmsg_len = NL_MESSAGE_SIZE;
> +	hdr->nlmsg_type = NLMSG_DONE;
> +	hdr->nlmsg_flags = 0;
> +	hdr->nlmsg_seq = 0;
> +	hdr->nlmsg_pid = getpid();
> +
> +	msg = (struct cn_msg *)NLMSG_DATA(hdr);
> +	msg->id.idx = CN_IDX_PROC;
> +	msg->id.val = CN_VAL_PROC;
> +	msg->seq = 0;
> +	msg->ack = 0;
> +	msg->flags = 0;
> +
> +	msg->len = sizeof(struct proc_input);
> +	((struct proc_input *)msg->data)->mcast_op =
> +		PROC_CN_MCAST_NOTIFY;
> +	((struct proc_input *)msg->data)->uexit_code = exit_code;
> +
> +	if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
> +		perror("send failed");
> +		return -errno;
> +	}
> +	return 0;
> +}
> +
> +int notify_netlink_thread_exit(unsigned int exit_code)
> +{
> +	struct sockaddr_nl sa_nl;
> +	int err = 0;
> +	int nl_sock;
> +
> +	nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
> +
> +	if (nl_sock == -1) {
> +		perror("socket failed");
> +		return -errno;
> +	}
> +
> +	bzero(&sa_nl, sizeof(sa_nl));
> +	sa_nl.nl_family = AF_NETLINK;
> +	sa_nl.nl_groups = CN_IDX_PROC;
> +	sa_nl.nl_pid    = gettid();
> +
> +	if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
> +		perror("bind failed");
> +		close(nl_sock);
> +		return -errno;
> +	}
> +
> +	err = send_message(nl_sock, exit_code);
> +
> +	close(nl_sock);
> +
> +	if (err < 0)
> +		return err;
> +
> +	return 0;
> +}
> -- 
> 2.46.0
>
Anjali Kulkarni Oct. 15, 2024, 9:28 p.m. UTC | #2
> On Oct 15, 2024, at 12:27 PM, Liam Howlett <liam.howlett@oracle.com> wrote:
> 
> * Anjali Kulkarni <anjali.k.kulkarni@oracle.com> [241015 13:30]:
>> Test to check if setting PROC_CN_MCAST_NOTIFY in proc connector API, allows
>> a thread's non-zero exit status to be returned to proc_filter.
>> 
>> The threads.c program creates 2 child threads. 1st thread handles signal
>> SIGSEGV, and 2nd thread needs to indicate some error condition (value 1)
>> to the kernel, instead of using pthread_exit() with 1.
>> 
>> In both cases, child sends notify_netlink_thread_exit(exit_code) to kernel,
>> to let kernel know it has exited abnormally with exit_code.
>> 
>> Compile:
>>    make thread
>>    make proc_filter
>> To see non-zero exit notifications, run:
>>    ./proc_filter -f
>> Run threads code in another window:
>>    ./threads
>> Note the 2 child thread IDs reported above
>> Send SIGSEGV signal to the child handling SIGSEGV:
>>    kill -11 <child1-tid>
>> Watch the child 1 tid being notified with exit code 11 to proc_filter
>> Watch child 2 tid being notified with exit code 1 (value defined in code)
>> to proc_filter
>> 
>> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>> ---
>> tools/testing/selftests/connector/Makefile    |  23 +++-
>> .../testing/selftests/connector/proc_filter.c |   5 +
>> tools/testing/selftests/connector/thread.c    | 116 ++++++++++++++++++
>> .../selftests/connector/thread_filter.c       |  96 +++++++++++++++
>> 4 files changed, 239 insertions(+), 1 deletion(-)
>> create mode 100644 tools/testing/selftests/connector/thread.c
>> create mode 100644 tools/testing/selftests/connector/thread_filter.c
>> 
>> diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
>> index 92188b9bac5c..bf335826bc3b 100644
>> --- a/tools/testing/selftests/connector/Makefile
>> +++ b/tools/testing/selftests/connector/Makefile
>> @@ -1,5 +1,26 @@
>> # SPDX-License-Identifier: GPL-2.0
>> -CFLAGS += -Wall $(KHDR_INCLUDES)
>> +KERNEL="../../../.."
>> +
>> +CFLAGS += -Wall $(KHDR_INCLUDES) -I $(KERNEL)/include/uapi -I $(KERNEL)/include
>> +
>> +proc_filter: proc_filter.o
>> + cc proc_filter.o -o proc_filter
>> +
>> +proc_filter.o: proc_filter.c
>> + cc -c proc_filter.c -o proc_filter.o $(CFLAGS)
>> +
>> +thread: thread.o thread_filter.o
>> + cc thread.o thread_filter.o -o thread
>> +
>> +thread.o: thread.c $(DEPS)
>> + cc -c thread.c -o thread.o $(CFLAGS)
>> +
>> +thread_filter.o: thread_filter.c
>> + cc -c thread_filter.c -o thread_filter.o $(CFLAGS)
>> +
>> +define EXTRA_CLEAN
>> + rm *.o thread
>> +endef
>> 
>> TEST_GEN_PROGS = proc_filter
>> 
>> diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
>> index 4a825b997666..6fb4842894f8 100644
>> --- a/tools/testing/selftests/connector/proc_filter.c
>> +++ b/tools/testing/selftests/connector/proc_filter.c
>> @@ -1,4 +1,9 @@
>> // SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>> + *
>> + * Copyright (c) 2024 Oracle and/or its affiliates.
>> + */
>> 
>> #include <sys/types.h>
>> #include <sys/epoll.h>
>> diff --git a/tools/testing/selftests/connector/thread.c b/tools/testing/selftests/connector/thread.c
>> new file mode 100644
>> index 000000000000..8c9abf6115d6
>> --- /dev/null
>> +++ b/tools/testing/selftests/connector/thread.c
>> @@ -0,0 +1,116 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>> + *
>> + * Copyright (c) 2024 Oracle and/or its affiliates.
>> + */
>> +
>> +#include <pthread.h>
>> +#include <stdio.h>
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +#include <signal.h>
>> +
>> +/*
>> + * This code tests a thread exit notification when thread exits abnormally.
>> + * Normally, when a thread exits abnormally, the kernel is not aware of the
>> + * exit code. This is usually only conveyed from child to parent via the
>> + * pthread_exit() and pthread_join() calls. Sometimes, however, a parent
>> + * process cannot monitor all child processes via pthread_join(), particularly
>> + * when there is a huge amount of child processes. In this case, the parent
>> + * has created the child with PTHREAD_CREATE_DETACHED attribute.
>> + * To fix this problem, either when child wants to convey non-zero exit via
>> + * pthread_exit() or in a signal handler, the child can notify the kernel's
>> + * connector module it's exit status via a netlink call with new type
>> + * PROC_CN_MCAST_NOTIFY. (Implemented in the thread_filter.c file).
>> + * This will send the exit code from the child to the kernel, which the kernel
>> + * can later return to proc_filter program when the child actually exits.
>> + * To test this usecase:
>> + * Compile:
>> + * make thread
>> + * make proc_filter
>> + * To see non-zero exit notifications, run:
>> + * ./proc_filter -f
>> + * Start the threads code, creating 2 threads, in another window:
>> + * ./threads
>> + * Note the 2 child thread IDs reported above
>> + * Send SIGSEGV signal to the child handling SIGSEGV:
>> + * kill -11 <child1-tid>
>> + * Watch the event being notified with exit code 11 to proc_filter
>> + * Watch child 2 tid being notified with exit code 1 (value defined in code)
>> + * to proc_filter
>> + */
>> +
>> +extern int notify_netlink_thread_exit(unsigned int exit_code);
>> +
>> +static void sigsegvh(int sig)
>> +{
>> + unsigned int exit_code = (unsigned int) sig;
>> + /*
>> +  * Send any non-zero value to get a notification. Here we are
>> +  * sending the signal number for SIGSEGV which is 11
>> +  */
>> + notify_netlink_thread_exit(exit_code);
>> +}
>> +
>> +void *threadc1(void *ptr)
>> +{
>> + signal(SIGSEGV, sigsegvh);
>> + printf("Child 1 thread id %d, handling SIGSEGV\n", gettid());
>> + sleep(20);
>> + pthread_exit(NULL);
>> +}
>> +
>> +void *threadc2(void *ptr)
>> +{
>> + int exit_val = 1;
>> +
>> + printf("Child 2 thread id %d, wants to exit with value %d\n",
>> + gettid(), exit_val);
>> + sleep(2);
>> + notify_netlink_thread_exit(exit_val);
>> + pthread_exit(NULL);
>> +}
>> +
>> +int main(int argc, char **argv)
>> +{
>> + pthread_t thread1, thread2;
>> + pthread_attr_t attr1, attr2;
>> + int ret;
>> +
>> + ret = pthread_attr_init(&attr1);
>> + if (ret != 0) {
>> + perror("pthread_attr_init failed");
>> + exit(ret);
>> + }
>> + ret = pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED);
>> + if (ret != 0) {
>> + perror("pthread_attr_setdetachstate failed");
>> + exit(ret);
>> + }
>> + ret = pthread_create(&thread1, &attr1, *threadc1, NULL);
>> + if (ret != 0) {
>> + perror("pthread_create failed");
>> + exit(ret);
>> + }
>> +
>> + ret = pthread_attr_init(&attr2);
>> + if (ret != 0) {
>> + perror("pthread_attr_init failed");
>> + exit(ret);
>> + }
>> + ret = pthread_attr_setdetachstate(&attr2, PTHREAD_CREATE_DETACHED);
>> + if (ret != 0) {
>> + perror("pthread_attr_setdetachstate failed");
>> + exit(ret);
>> + }
>> + ret = pthread_create(&thread2, &attr2, *threadc2, NULL);
>> + if (ret != 0) {
>> + perror("pthread_create failed");
>> + exit(ret);
>> + }
> 
> I expected the test to check for the correct value to be returned?
> Could you use pthread_join() and verify the same value is returned
> through the new mechanism, or will they not match?

This is a manual test, you need to check visually that the proc_filter
Is returning correct values for the 2 cases. I can make this an
automated test, reading the output of proc_filter directly from a file
and checking the values are as expected.
There is also another test program which does an automated test
of the values returned by proc_filter for scale (100k or so), which I
will send out later. (The co-author of that patch, who wrote the
original code for processes, is away - @peili.io@oracle.com)
pthread_join() cannot return a value to the kernel, so we cannot
use it to verify the new mechanism. 

> 
>> +
>> + /* Wait for children to exit or be killed */
>> + sleep(30);
>> + exit(0);
>> +}
>> diff --git a/tools/testing/selftests/connector/thread_filter.c b/tools/testing/selftests/connector/thread_filter.c
>> new file mode 100644
>> index 000000000000..3da740aa7537
>> --- /dev/null
>> +++ b/tools/testing/selftests/connector/thread_filter.c
>> @@ -0,0 +1,96 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>> + *
>> + * Copyright (c) 2024 Oracle and/or its affiliates.
>> + */
>> +
>> +#include <sys/types.h>
>> +#include <sys/epoll.h>
>> +#include <sys/socket.h>
>> +#include <linux/netlink.h>
>> +#include <linux/connector.h>
>> +#include <linux/cn_proc.h>
>> +
>> +#include <stddef.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <strings.h>
>> +#include <errno.h>
>> +#include <signal.h>
>> +#include <string.h>
>> +
>> +#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
>> + sizeof(struct proc_input))
>> +
>> +/*
>> + * Send PROC_CN_MCAST_NOTIFY type notification to the connector code in kernel.
>> + * This will send the exit_code specified by user to the connector layer, so
>> + * it can send a notification for that event to any listening process
>> + */
>> +int send_message(int nl_sock, unsigned int exit_code)
>> +{
>> + char buff[NL_MESSAGE_SIZE];
>> + struct nlmsghdr *hdr;
>> + struct cn_msg *msg;
>> +
>> + hdr = (struct nlmsghdr *)buff;
>> + hdr->nlmsg_len = NL_MESSAGE_SIZE;
>> + hdr->nlmsg_type = NLMSG_DONE;
>> + hdr->nlmsg_flags = 0;
>> + hdr->nlmsg_seq = 0;
>> + hdr->nlmsg_pid = getpid();
>> +
>> + msg = (struct cn_msg *)NLMSG_DATA(hdr);
>> + msg->id.idx = CN_IDX_PROC;
>> + msg->id.val = CN_VAL_PROC;
>> + msg->seq = 0;
>> + msg->ack = 0;
>> + msg->flags = 0;
>> +
>> + msg->len = sizeof(struct proc_input);
>> + ((struct proc_input *)msg->data)->mcast_op =
>> + PROC_CN_MCAST_NOTIFY;
>> + ((struct proc_input *)msg->data)->uexit_code = exit_code;
>> +
>> + if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
>> + perror("send failed");
>> + return -errno;
>> + }
>> + return 0;
>> +}
>> +
>> +int notify_netlink_thread_exit(unsigned int exit_code)
>> +{
>> + struct sockaddr_nl sa_nl;
>> + int err = 0;
>> + int nl_sock;
>> +
>> + nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
>> +
>> + if (nl_sock == -1) {
>> + perror("socket failed");
>> + return -errno;
>> + }
>> +
>> + bzero(&sa_nl, sizeof(sa_nl));
>> + sa_nl.nl_family = AF_NETLINK;
>> + sa_nl.nl_groups = CN_IDX_PROC;
>> + sa_nl.nl_pid    = gettid();
>> +
>> + if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
>> + perror("bind failed");
>> + close(nl_sock);
>> + return -errno;
>> + }
>> +
>> + err = send_message(nl_sock, exit_code);
>> +
>> + close(nl_sock);
>> +
>> + if (err < 0)
>> + return err;
>> +
>> + return 0;
>> +}
>> -- 
>> 2.46.0
Liam R. Howlett Oct. 16, 2024, 12:32 a.m. UTC | #3
* Anjali Kulkarni <anjali.k.kulkarni@oracle.com> [241015 17:28]:
> 
> 
> > On Oct 15, 2024, at 12:27 PM, Liam Howlett <liam.howlett@oracle.com> wrote:
> > 
> > * Anjali Kulkarni <anjali.k.kulkarni@oracle.com> [241015 13:30]:
> >> Test to check if setting PROC_CN_MCAST_NOTIFY in proc connector API, allows
> >> a thread's non-zero exit status to be returned to proc_filter.
> >> 
> >> The threads.c program creates 2 child threads. 1st thread handles signal
> >> SIGSEGV, and 2nd thread needs to indicate some error condition (value 1)
> >> to the kernel, instead of using pthread_exit() with 1.
> >> 
> >> In both cases, child sends notify_netlink_thread_exit(exit_code) to kernel,
> >> to let kernel know it has exited abnormally with exit_code.
> >> 
> >> Compile:
> >>    make thread
> >>    make proc_filter
> >> To see non-zero exit notifications, run:
> >>    ./proc_filter -f
> >> Run threads code in another window:
> >>    ./threads
> >> Note the 2 child thread IDs reported above
> >> Send SIGSEGV signal to the child handling SIGSEGV:
> >>    kill -11 <child1-tid>
> >> Watch the child 1 tid being notified with exit code 11 to proc_filter
> >> Watch child 2 tid being notified with exit code 1 (value defined in code)
> >> to proc_filter
> >> 
> >> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> >> ---
> >> tools/testing/selftests/connector/Makefile    |  23 +++-
> >> .../testing/selftests/connector/proc_filter.c |   5 +
> >> tools/testing/selftests/connector/thread.c    | 116 ++++++++++++++++++
> >> .../selftests/connector/thread_filter.c       |  96 +++++++++++++++
> >> 4 files changed, 239 insertions(+), 1 deletion(-)
> >> create mode 100644 tools/testing/selftests/connector/thread.c
> >> create mode 100644 tools/testing/selftests/connector/thread_filter.c
> >> 
> >> diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
> >> index 92188b9bac5c..bf335826bc3b 100644
> >> --- a/tools/testing/selftests/connector/Makefile
> >> +++ b/tools/testing/selftests/connector/Makefile
> >> @@ -1,5 +1,26 @@
> >> # SPDX-License-Identifier: GPL-2.0
> >> -CFLAGS += -Wall $(KHDR_INCLUDES)
> >> +KERNEL="../../../.."
> >> +
> >> +CFLAGS += -Wall $(KHDR_INCLUDES) -I $(KERNEL)/include/uapi -I $(KERNEL)/include
> >> +
> >> +proc_filter: proc_filter.o
> >> + cc proc_filter.o -o proc_filter
> >> +
> >> +proc_filter.o: proc_filter.c
> >> + cc -c proc_filter.c -o proc_filter.o $(CFLAGS)
> >> +
> >> +thread: thread.o thread_filter.o
> >> + cc thread.o thread_filter.o -o thread
> >> +
> >> +thread.o: thread.c $(DEPS)
> >> + cc -c thread.c -o thread.o $(CFLAGS)
> >> +
> >> +thread_filter.o: thread_filter.c
> >> + cc -c thread_filter.c -o thread_filter.o $(CFLAGS)
> >> +
> >> +define EXTRA_CLEAN
> >> + rm *.o thread
> >> +endef
> >> 
> >> TEST_GEN_PROGS = proc_filter
> >> 
> >> diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
> >> index 4a825b997666..6fb4842894f8 100644
> >> --- a/tools/testing/selftests/connector/proc_filter.c
> >> +++ b/tools/testing/selftests/connector/proc_filter.c
> >> @@ -1,4 +1,9 @@
> >> // SPDX-License-Identifier: GPL-2.0-only
> >> +/*
> >> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> >> + *
> >> + * Copyright (c) 2024 Oracle and/or its affiliates.
> >> + */
> >> 
> >> #include <sys/types.h>
> >> #include <sys/epoll.h>
> >> diff --git a/tools/testing/selftests/connector/thread.c b/tools/testing/selftests/connector/thread.c
> >> new file mode 100644
> >> index 000000000000..8c9abf6115d6
> >> --- /dev/null
> >> +++ b/tools/testing/selftests/connector/thread.c
> >> @@ -0,0 +1,116 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/*
> >> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> >> + *
> >> + * Copyright (c) 2024 Oracle and/or its affiliates.
> >> + */
> >> +
> >> +#include <pthread.h>
> >> +#include <stdio.h>
> >> +#include <unistd.h>
> >> +#include <stdlib.h>
> >> +#include <signal.h>
> >> +
> >> +/*
> >> + * This code tests a thread exit notification when thread exits abnormally.
> >> + * Normally, when a thread exits abnormally, the kernel is not aware of the
> >> + * exit code. This is usually only conveyed from child to parent via the
> >> + * pthread_exit() and pthread_join() calls. Sometimes, however, a parent
> >> + * process cannot monitor all child processes via pthread_join(), particularly
> >> + * when there is a huge amount of child processes. In this case, the parent
> >> + * has created the child with PTHREAD_CREATE_DETACHED attribute.
> >> + * To fix this problem, either when child wants to convey non-zero exit via
> >> + * pthread_exit() or in a signal handler, the child can notify the kernel's
> >> + * connector module it's exit status via a netlink call with new type
> >> + * PROC_CN_MCAST_NOTIFY. (Implemented in the thread_filter.c file).
> >> + * This will send the exit code from the child to the kernel, which the kernel
> >> + * can later return to proc_filter program when the child actually exits.
> >> + * To test this usecase:
> >> + * Compile:
> >> + * make thread
> >> + * make proc_filter
> >> + * To see non-zero exit notifications, run:
> >> + * ./proc_filter -f
> >> + * Start the threads code, creating 2 threads, in another window:
> >> + * ./threads
> >> + * Note the 2 child thread IDs reported above
> >> + * Send SIGSEGV signal to the child handling SIGSEGV:
> >> + * kill -11 <child1-tid>
> >> + * Watch the event being notified with exit code 11 to proc_filter
> >> + * Watch child 2 tid being notified with exit code 1 (value defined in code)
> >> + * to proc_filter
> >> + */
> >> +
> >> +extern int notify_netlink_thread_exit(unsigned int exit_code);
> >> +
> >> +static void sigsegvh(int sig)
> >> +{
> >> + unsigned int exit_code = (unsigned int) sig;
> >> + /*
> >> +  * Send any non-zero value to get a notification. Here we are
> >> +  * sending the signal number for SIGSEGV which is 11
> >> +  */
> >> + notify_netlink_thread_exit(exit_code);
> >> +}
> >> +
> >> +void *threadc1(void *ptr)
> >> +{
> >> + signal(SIGSEGV, sigsegvh);
> >> + printf("Child 1 thread id %d, handling SIGSEGV\n", gettid());
> >> + sleep(20);
> >> + pthread_exit(NULL);
> >> +}
> >> +
> >> +void *threadc2(void *ptr)
> >> +{
> >> + int exit_val = 1;
> >> +
> >> + printf("Child 2 thread id %d, wants to exit with value %d\n",
> >> + gettid(), exit_val);
> >> + sleep(2);
> >> + notify_netlink_thread_exit(exit_val);
> >> + pthread_exit(NULL);
> >> +}
> >> +
> >> +int main(int argc, char **argv)
> >> +{
> >> + pthread_t thread1, thread2;
> >> + pthread_attr_t attr1, attr2;
> >> + int ret;
> >> +
> >> + ret = pthread_attr_init(&attr1);
> >> + if (ret != 0) {
> >> + perror("pthread_attr_init failed");
> >> + exit(ret);
> >> + }
> >> + ret = pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED);
> >> + if (ret != 0) {
> >> + perror("pthread_attr_setdetachstate failed");
> >> + exit(ret);
> >> + }
> >> + ret = pthread_create(&thread1, &attr1, *threadc1, NULL);
> >> + if (ret != 0) {
> >> + perror("pthread_create failed");
> >> + exit(ret);
> >> + }
> >> +
> >> + ret = pthread_attr_init(&attr2);
> >> + if (ret != 0) {
> >> + perror("pthread_attr_init failed");
> >> + exit(ret);
> >> + }
> >> + ret = pthread_attr_setdetachstate(&attr2, PTHREAD_CREATE_DETACHED);
> >> + if (ret != 0) {
> >> + perror("pthread_attr_setdetachstate failed");
> >> + exit(ret);
> >> + }
> >> + ret = pthread_create(&thread2, &attr2, *threadc2, NULL);
> >> + if (ret != 0) {
> >> + perror("pthread_create failed");
> >> + exit(ret);
> >> + }
> > 
> > I expected the test to check for the correct value to be returned?
> > Could you use pthread_join() and verify the same value is returned
> > through the new mechanism, or will they not match?
> 
> This is a manual test, you need to check visually that the proc_filter
> Is returning correct values for the 2 cases. I can make this an
> automated test, reading the output of proc_filter directly from a file
> and checking the values are as expected.
> There is also another test program which does an automated test
> of the values returned by proc_filter for scale (100k or so), which I
> will send out later. (The co-author of that patch, who wrote the
> original code for processes, is away - @peili.io@oracle.com)
> pthread_join() cannot return a value to the kernel, so we cannot
> use it to verify the new mechanism. 


Okay, as long as there is an automatic test then it's probably fine.

> 
> > 
> >> +
> >> + /* Wait for children to exit or be killed */
> >> + sleep(30);
> >> + exit(0);
> >> +}
> >> diff --git a/tools/testing/selftests/connector/thread_filter.c b/tools/testing/selftests/connector/thread_filter.c
> >> new file mode 100644
> >> index 000000000000..3da740aa7537
> >> --- /dev/null
> >> +++ b/tools/testing/selftests/connector/thread_filter.c
> >> @@ -0,0 +1,96 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> +/*
> >> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
> >> + *
> >> + * Copyright (c) 2024 Oracle and/or its affiliates.
> >> + */
> >> +
> >> +#include <sys/types.h>
> >> +#include <sys/epoll.h>
> >> +#include <sys/socket.h>
> >> +#include <linux/netlink.h>
> >> +#include <linux/connector.h>
> >> +#include <linux/cn_proc.h>
> >> +
> >> +#include <stddef.h>
> >> +#include <stdio.h>
> >> +#include <stdlib.h>
> >> +#include <unistd.h>
> >> +#include <strings.h>
> >> +#include <errno.h>
> >> +#include <signal.h>
> >> +#include <string.h>
> >> +
> >> +#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
> >> + sizeof(struct proc_input))
> >> +
> >> +/*
> >> + * Send PROC_CN_MCAST_NOTIFY type notification to the connector code in kernel.
> >> + * This will send the exit_code specified by user to the connector layer, so
> >> + * it can send a notification for that event to any listening process
> >> + */
> >> +int send_message(int nl_sock, unsigned int exit_code)
> >> +{
> >> + char buff[NL_MESSAGE_SIZE];
> >> + struct nlmsghdr *hdr;
> >> + struct cn_msg *msg;
> >> +
> >> + hdr = (struct nlmsghdr *)buff;
> >> + hdr->nlmsg_len = NL_MESSAGE_SIZE;
> >> + hdr->nlmsg_type = NLMSG_DONE;
> >> + hdr->nlmsg_flags = 0;
> >> + hdr->nlmsg_seq = 0;
> >> + hdr->nlmsg_pid = getpid();
> >> +
> >> + msg = (struct cn_msg *)NLMSG_DATA(hdr);
> >> + msg->id.idx = CN_IDX_PROC;
> >> + msg->id.val = CN_VAL_PROC;
> >> + msg->seq = 0;
> >> + msg->ack = 0;
> >> + msg->flags = 0;
> >> +
> >> + msg->len = sizeof(struct proc_input);
> >> + ((struct proc_input *)msg->data)->mcast_op =
> >> + PROC_CN_MCAST_NOTIFY;
> >> + ((struct proc_input *)msg->data)->uexit_code = exit_code;
> >> +
> >> + if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
> >> + perror("send failed");
> >> + return -errno;
> >> + }
> >> + return 0;
> >> +}
> >> +
> >> +int notify_netlink_thread_exit(unsigned int exit_code)
> >> +{
> >> + struct sockaddr_nl sa_nl;
> >> + int err = 0;
> >> + int nl_sock;
> >> +
> >> + nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
> >> +
> >> + if (nl_sock == -1) {
> >> + perror("socket failed");
> >> + return -errno;
> >> + }
> >> +
> >> + bzero(&sa_nl, sizeof(sa_nl));
> >> + sa_nl.nl_family = AF_NETLINK;
> >> + sa_nl.nl_groups = CN_IDX_PROC;
> >> + sa_nl.nl_pid    = gettid();
> >> +
> >> + if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
> >> + perror("bind failed");
> >> + close(nl_sock);
> >> + return -errno;
> >> + }
> >> +
> >> + err = send_message(nl_sock, exit_code);
> >> +
> >> + close(nl_sock);
> >> +
> >> + if (err < 0)
> >> + return err;
> >> +
> >> + return 0;
> >> +}
> >> -- 
> >> 2.46.0
> 
>
Anjali Kulkarni Oct. 16, 2024, 5:47 p.m. UTC | #4
> On Oct 15, 2024, at 5:32 PM, Liam Howlett <liam.howlett@oracle.com> wrote:
> 
> * Anjali Kulkarni <anjali.k.kulkarni@oracle.com> [241015 17:28]:
>> 
>> 
>>> On Oct 15, 2024, at 12:27 PM, Liam Howlett <liam.howlett@oracle.com> wrote:
>>> 
>>> * Anjali Kulkarni <anjali.k.kulkarni@oracle.com> [241015 13:30]:
>>>> Test to check if setting PROC_CN_MCAST_NOTIFY in proc connector API, allows
>>>> a thread's non-zero exit status to be returned to proc_filter.
>>>> 
>>>> The threads.c program creates 2 child threads. 1st thread handles signal
>>>> SIGSEGV, and 2nd thread needs to indicate some error condition (value 1)
>>>> to the kernel, instead of using pthread_exit() with 1.
>>>> 
>>>> In both cases, child sends notify_netlink_thread_exit(exit_code) to kernel,
>>>> to let kernel know it has exited abnormally with exit_code.
>>>> 
>>>> Compile:
>>>>   make thread
>>>>   make proc_filter
>>>> To see non-zero exit notifications, run:
>>>>   ./proc_filter -f
>>>> Run threads code in another window:
>>>>   ./threads
>>>> Note the 2 child thread IDs reported above
>>>> Send SIGSEGV signal to the child handling SIGSEGV:
>>>>   kill -11 <child1-tid>
>>>> Watch the child 1 tid being notified with exit code 11 to proc_filter
>>>> Watch child 2 tid being notified with exit code 1 (value defined in code)
>>>> to proc_filter
>>>> 
>>>> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>>>> ---
>>>> tools/testing/selftests/connector/Makefile    |  23 +++-
>>>> .../testing/selftests/connector/proc_filter.c |   5 +
>>>> tools/testing/selftests/connector/thread.c    | 116 ++++++++++++++++++
>>>> .../selftests/connector/thread_filter.c       |  96 +++++++++++++++
>>>> 4 files changed, 239 insertions(+), 1 deletion(-)
>>>> create mode 100644 tools/testing/selftests/connector/thread.c
>>>> create mode 100644 tools/testing/selftests/connector/thread_filter.c
>>>> 
>>>> diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
>>>> index 92188b9bac5c..bf335826bc3b 100644
>>>> --- a/tools/testing/selftests/connector/Makefile
>>>> +++ b/tools/testing/selftests/connector/Makefile
>>>> @@ -1,5 +1,26 @@
>>>> # SPDX-License-Identifier: GPL-2.0
>>>> -CFLAGS += -Wall $(KHDR_INCLUDES)
>>>> +KERNEL="../../../.."
>>>> +
>>>> +CFLAGS += -Wall $(KHDR_INCLUDES) -I $(KERNEL)/include/uapi -I $(KERNEL)/include
>>>> +
>>>> +proc_filter: proc_filter.o
>>>> + cc proc_filter.o -o proc_filter
>>>> +
>>>> +proc_filter.o: proc_filter.c
>>>> + cc -c proc_filter.c -o proc_filter.o $(CFLAGS)
>>>> +
>>>> +thread: thread.o thread_filter.o
>>>> + cc thread.o thread_filter.o -o thread
>>>> +
>>>> +thread.o: thread.c $(DEPS)
>>>> + cc -c thread.c -o thread.o $(CFLAGS)
>>>> +
>>>> +thread_filter.o: thread_filter.c
>>>> + cc -c thread_filter.c -o thread_filter.o $(CFLAGS)
>>>> +
>>>> +define EXTRA_CLEAN
>>>> + rm *.o thread
>>>> +endef
>>>> 
>>>> TEST_GEN_PROGS = proc_filter
>>>> 
>>>> diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
>>>> index 4a825b997666..6fb4842894f8 100644
>>>> --- a/tools/testing/selftests/connector/proc_filter.c
>>>> +++ b/tools/testing/selftests/connector/proc_filter.c
>>>> @@ -1,4 +1,9 @@
>>>> // SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>>>> + *
>>>> + * Copyright (c) 2024 Oracle and/or its affiliates.
>>>> + */
>>>> 
>>>> #include <sys/types.h>
>>>> #include <sys/epoll.h>
>>>> diff --git a/tools/testing/selftests/connector/thread.c b/tools/testing/selftests/connector/thread.c
>>>> new file mode 100644
>>>> index 000000000000..8c9abf6115d6
>>>> --- /dev/null
>>>> +++ b/tools/testing/selftests/connector/thread.c
>>>> @@ -0,0 +1,116 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>>>> + *
>>>> + * Copyright (c) 2024 Oracle and/or its affiliates.
>>>> + */
>>>> +
>>>> +#include <pthread.h>
>>>> +#include <stdio.h>
>>>> +#include <unistd.h>
>>>> +#include <stdlib.h>
>>>> +#include <signal.h>
>>>> +
>>>> +/*
>>>> + * This code tests a thread exit notification when thread exits abnormally.
>>>> + * Normally, when a thread exits abnormally, the kernel is not aware of the
>>>> + * exit code. This is usually only conveyed from child to parent via the
>>>> + * pthread_exit() and pthread_join() calls. Sometimes, however, a parent
>>>> + * process cannot monitor all child processes via pthread_join(), particularly
>>>> + * when there is a huge amount of child processes. In this case, the parent
>>>> + * has created the child with PTHREAD_CREATE_DETACHED attribute.
>>>> + * To fix this problem, either when child wants to convey non-zero exit via
>>>> + * pthread_exit() or in a signal handler, the child can notify the kernel's
>>>> + * connector module it's exit status via a netlink call with new type
>>>> + * PROC_CN_MCAST_NOTIFY. (Implemented in the thread_filter.c file).
>>>> + * This will send the exit code from the child to the kernel, which the kernel
>>>> + * can later return to proc_filter program when the child actually exits.
>>>> + * To test this usecase:
>>>> + * Compile:
>>>> + * make thread
>>>> + * make proc_filter
>>>> + * To see non-zero exit notifications, run:
>>>> + * ./proc_filter -f
>>>> + * Start the threads code, creating 2 threads, in another window:
>>>> + * ./threads
>>>> + * Note the 2 child thread IDs reported above
>>>> + * Send SIGSEGV signal to the child handling SIGSEGV:
>>>> + * kill -11 <child1-tid>
>>>> + * Watch the event being notified with exit code 11 to proc_filter
>>>> + * Watch child 2 tid being notified with exit code 1 (value defined in code)
>>>> + * to proc_filter
>>>> + */
>>>> +
>>>> +extern int notify_netlink_thread_exit(unsigned int exit_code);
>>>> +
>>>> +static void sigsegvh(int sig)
>>>> +{
>>>> + unsigned int exit_code = (unsigned int) sig;
>>>> + /*
>>>> +  * Send any non-zero value to get a notification. Here we are
>>>> +  * sending the signal number for SIGSEGV which is 11
>>>> +  */
>>>> + notify_netlink_thread_exit(exit_code);
>>>> +}
>>>> +
>>>> +void *threadc1(void *ptr)
>>>> +{
>>>> + signal(SIGSEGV, sigsegvh);
>>>> + printf("Child 1 thread id %d, handling SIGSEGV\n", gettid());
>>>> + sleep(20);
>>>> + pthread_exit(NULL);
>>>> +}
>>>> +
>>>> +void *threadc2(void *ptr)
>>>> +{
>>>> + int exit_val = 1;
>>>> +
>>>> + printf("Child 2 thread id %d, wants to exit with value %d\n",
>>>> + gettid(), exit_val);
>>>> + sleep(2);
>>>> + notify_netlink_thread_exit(exit_val);
>>>> + pthread_exit(NULL);
>>>> +}
>>>> +
>>>> +int main(int argc, char **argv)
>>>> +{
>>>> + pthread_t thread1, thread2;
>>>> + pthread_attr_t attr1, attr2;
>>>> + int ret;
>>>> +
>>>> + ret = pthread_attr_init(&attr1);
>>>> + if (ret != 0) {
>>>> + perror("pthread_attr_init failed");
>>>> + exit(ret);
>>>> + }
>>>> + ret = pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED);
>>>> + if (ret != 0) {
>>>> + perror("pthread_attr_setdetachstate failed");
>>>> + exit(ret);
>>>> + }
>>>> + ret = pthread_create(&thread1, &attr1, *threadc1, NULL);
>>>> + if (ret != 0) {
>>>> + perror("pthread_create failed");
>>>> + exit(ret);
>>>> + }
>>>> +
>>>> + ret = pthread_attr_init(&attr2);
>>>> + if (ret != 0) {
>>>> + perror("pthread_attr_init failed");
>>>> + exit(ret);
>>>> + }
>>>> + ret = pthread_attr_setdetachstate(&attr2, PTHREAD_CREATE_DETACHED);
>>>> + if (ret != 0) {
>>>> + perror("pthread_attr_setdetachstate failed");
>>>> + exit(ret);
>>>> + }
>>>> + ret = pthread_create(&thread2, &attr2, *threadc2, NULL);
>>>> + if (ret != 0) {
>>>> + perror("pthread_create failed");
>>>> + exit(ret);
>>>> + }
>>> 
>>> I expected the test to check for the correct value to be returned?
>>> Could you use pthread_join() and verify the same value is returned
>>> through the new mechanism, or will they not match?
>> 
>> This is a manual test, you need to check visually that the proc_filter
>> Is returning correct values for the 2 cases. I can make this an
>> automated test, reading the output of proc_filter directly from a file
>> and checking the values are as expected.
>> There is also another test program which does an automated test
>> of the values returned by proc_filter for scale (100k or so), which I
>> will send out later. (The co-author of that patch, who wrote the
>> original code for processes, is away - @peili.io@oracle.com)
>> pthread_join() cannot return a value to the kernel, so we cannot
>> use it to verify the new mechanism. 
> 
> 
> Okay, as long as there is an automatic test then it's probably fine.

Yes, I have made the current test automatic. In addition, the 
scaling test I will submit for review later, which is also automatic.

> 
>> 
>>> 
>>>> +
>>>> + /* Wait for children to exit or be killed */
>>>> + sleep(30);
>>>> + exit(0);
>>>> +}
>>>> diff --git a/tools/testing/selftests/connector/thread_filter.c b/tools/testing/selftests/connector/thread_filter.c
>>>> new file mode 100644
>>>> index 000000000000..3da740aa7537
>>>> --- /dev/null
>>>> +++ b/tools/testing/selftests/connector/thread_filter.c
>>>> @@ -0,0 +1,96 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
>>>> + *
>>>> + * Copyright (c) 2024 Oracle and/or its affiliates.
>>>> + */
>>>> +
>>>> +#include <sys/types.h>
>>>> +#include <sys/epoll.h>
>>>> +#include <sys/socket.h>
>>>> +#include <linux/netlink.h>
>>>> +#include <linux/connector.h>
>>>> +#include <linux/cn_proc.h>
>>>> +
>>>> +#include <stddef.h>
>>>> +#include <stdio.h>
>>>> +#include <stdlib.h>
>>>> +#include <unistd.h>
>>>> +#include <strings.h>
>>>> +#include <errno.h>
>>>> +#include <signal.h>
>>>> +#include <string.h>
>>>> +
>>>> +#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
>>>> + sizeof(struct proc_input))
>>>> +
>>>> +/*
>>>> + * Send PROC_CN_MCAST_NOTIFY type notification to the connector code in kernel.
>>>> + * This will send the exit_code specified by user to the connector layer, so
>>>> + * it can send a notification for that event to any listening process
>>>> + */
>>>> +int send_message(int nl_sock, unsigned int exit_code)
>>>> +{
>>>> + char buff[NL_MESSAGE_SIZE];
>>>> + struct nlmsghdr *hdr;
>>>> + struct cn_msg *msg;
>>>> +
>>>> + hdr = (struct nlmsghdr *)buff;
>>>> + hdr->nlmsg_len = NL_MESSAGE_SIZE;
>>>> + hdr->nlmsg_type = NLMSG_DONE;
>>>> + hdr->nlmsg_flags = 0;
>>>> + hdr->nlmsg_seq = 0;
>>>> + hdr->nlmsg_pid = getpid();
>>>> +
>>>> + msg = (struct cn_msg *)NLMSG_DATA(hdr);
>>>> + msg->id.idx = CN_IDX_PROC;
>>>> + msg->id.val = CN_VAL_PROC;
>>>> + msg->seq = 0;
>>>> + msg->ack = 0;
>>>> + msg->flags = 0;
>>>> +
>>>> + msg->len = sizeof(struct proc_input);
>>>> + ((struct proc_input *)msg->data)->mcast_op =
>>>> + PROC_CN_MCAST_NOTIFY;
>>>> + ((struct proc_input *)msg->data)->uexit_code = exit_code;
>>>> +
>>>> + if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
>>>> + perror("send failed");
>>>> + return -errno;
>>>> + }
>>>> + return 0;
>>>> +}
>>>> +
>>>> +int notify_netlink_thread_exit(unsigned int exit_code)
>>>> +{
>>>> + struct sockaddr_nl sa_nl;
>>>> + int err = 0;
>>>> + int nl_sock;
>>>> +
>>>> + nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
>>>> +
>>>> + if (nl_sock == -1) {
>>>> + perror("socket failed");
>>>> + return -errno;
>>>> + }
>>>> +
>>>> + bzero(&sa_nl, sizeof(sa_nl));
>>>> + sa_nl.nl_family = AF_NETLINK;
>>>> + sa_nl.nl_groups = CN_IDX_PROC;
>>>> + sa_nl.nl_pid    = gettid();
>>>> +
>>>> + if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
>>>> + perror("bind failed");
>>>> + close(nl_sock);
>>>> + return -errno;
>>>> + }
>>>> +
>>>> + err = send_message(nl_sock, exit_code);
>>>> +
>>>> + close(nl_sock);
>>>> +
>>>> + if (err < 0)
>>>> + return err;
>>>> +
>>>> + return 0;
>>>> +}
>>>> -- 
>>>> 2.46.0
diff mbox series

Patch

diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
index 92188b9bac5c..bf335826bc3b 100644
--- a/tools/testing/selftests/connector/Makefile
+++ b/tools/testing/selftests/connector/Makefile
@@ -1,5 +1,26 @@ 
 # SPDX-License-Identifier: GPL-2.0
-CFLAGS += -Wall $(KHDR_INCLUDES)
+KERNEL="../../../.."
+
+CFLAGS += -Wall $(KHDR_INCLUDES) -I $(KERNEL)/include/uapi -I $(KERNEL)/include
+
+proc_filter: proc_filter.o
+	cc proc_filter.o -o proc_filter
+
+proc_filter.o: proc_filter.c
+	cc -c proc_filter.c -o proc_filter.o $(CFLAGS)
+
+thread: thread.o thread_filter.o
+	cc thread.o thread_filter.o -o thread
+
+thread.o: thread.c $(DEPS)
+		cc -c thread.c -o thread.o $(CFLAGS)
+
+thread_filter.o: thread_filter.c
+		cc -c thread_filter.c -o thread_filter.o $(CFLAGS)
+
+define EXTRA_CLEAN
+	rm *.o thread
+endef
 
 TEST_GEN_PROGS = proc_filter
 
diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
index 4a825b997666..6fb4842894f8 100644
--- a/tools/testing/selftests/connector/proc_filter.c
+++ b/tools/testing/selftests/connector/proc_filter.c
@@ -1,4 +1,9 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
+ *
+ * Copyright (c) 2024 Oracle and/or its affiliates.
+ */
 
 #include <sys/types.h>
 #include <sys/epoll.h>
diff --git a/tools/testing/selftests/connector/thread.c b/tools/testing/selftests/connector/thread.c
new file mode 100644
index 000000000000..8c9abf6115d6
--- /dev/null
+++ b/tools/testing/selftests/connector/thread.c
@@ -0,0 +1,116 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
+ *
+ * Copyright (c) 2024 Oracle and/or its affiliates.
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+
+/*
+ * This code tests a thread exit notification when thread exits abnormally.
+ * Normally, when a thread exits abnormally, the kernel is not aware of the
+ * exit code. This is usually only conveyed from child to parent via the
+ * pthread_exit() and pthread_join() calls. Sometimes, however, a parent
+ * process cannot monitor all child processes via pthread_join(), particularly
+ * when there is a huge amount of child processes. In this case, the parent
+ * has created the child with PTHREAD_CREATE_DETACHED attribute.
+ * To fix this problem, either when child wants to convey non-zero exit via
+ * pthread_exit() or in a signal handler, the child can notify the kernel's
+ * connector module it's exit status via a netlink call with new type
+ * PROC_CN_MCAST_NOTIFY. (Implemented in the thread_filter.c file).
+ * This will send the exit code from the child to the kernel, which the kernel
+ * can later return to proc_filter program when the child actually exits.
+ * To test this usecase:
+ * Compile:
+ *	make thread
+ *	make proc_filter
+ * To see non-zero exit notifications, run:
+ *	./proc_filter -f
+ * Start the threads code, creating 2 threads, in another window:
+ *	./threads
+ * Note the 2 child thread IDs reported above
+ * Send SIGSEGV signal to the child handling SIGSEGV:
+ *	kill -11 <child1-tid>
+ * Watch the event being notified with exit code 11 to proc_filter
+ * Watch child 2 tid being notified with exit code 1 (value defined in code)
+ * to proc_filter
+ */
+
+extern int notify_netlink_thread_exit(unsigned int exit_code);
+
+static void sigsegvh(int sig)
+{
+	unsigned int exit_code = (unsigned int) sig;
+	/*
+	 * Send any non-zero value to get a notification. Here we are
+	 * sending the signal number for SIGSEGV which is 11
+	 */
+	notify_netlink_thread_exit(exit_code);
+}
+
+void *threadc1(void *ptr)
+{
+	signal(SIGSEGV, sigsegvh);
+	printf("Child 1 thread id %d, handling SIGSEGV\n", gettid());
+	sleep(20);
+	pthread_exit(NULL);
+}
+
+void *threadc2(void *ptr)
+{
+	int exit_val = 1;
+
+	printf("Child 2 thread id %d, wants to exit with value %d\n",
+			gettid(), exit_val);
+	sleep(2);
+	notify_netlink_thread_exit(exit_val);
+	pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+	pthread_t thread1, thread2;
+	pthread_attr_t attr1, attr2;
+	int ret;
+
+	ret = pthread_attr_init(&attr1);
+	if (ret != 0) {
+		perror("pthread_attr_init failed");
+		exit(ret);
+	}
+	ret = pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED);
+	if (ret != 0) {
+		perror("pthread_attr_setdetachstate failed");
+		exit(ret);
+	}
+	ret = pthread_create(&thread1, &attr1, *threadc1, NULL);
+	if (ret != 0) {
+		perror("pthread_create failed");
+		exit(ret);
+	}
+
+	ret = pthread_attr_init(&attr2);
+	if (ret != 0) {
+		perror("pthread_attr_init failed");
+		exit(ret);
+	}
+	ret = pthread_attr_setdetachstate(&attr2, PTHREAD_CREATE_DETACHED);
+	if (ret != 0) {
+		perror("pthread_attr_setdetachstate failed");
+		exit(ret);
+	}
+	ret = pthread_create(&thread2, &attr2, *threadc2, NULL);
+	if (ret != 0) {
+		perror("pthread_create failed");
+		exit(ret);
+	}
+
+	/* Wait for children to exit or be killed */
+	sleep(30);
+	exit(0);
+}
diff --git a/tools/testing/selftests/connector/thread_filter.c b/tools/testing/selftests/connector/thread_filter.c
new file mode 100644
index 000000000000..3da740aa7537
--- /dev/null
+++ b/tools/testing/selftests/connector/thread_filter.c
@@ -0,0 +1,96 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Author: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
+ *
+ * Copyright (c) 2024 Oracle and/or its affiliates.
+ */
+
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/connector.h>
+#include <linux/cn_proc.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <strings.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+
+#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+			sizeof(struct proc_input))
+
+/*
+ * Send PROC_CN_MCAST_NOTIFY type notification to the connector code in kernel.
+ * This will send the exit_code specified by user to the connector layer, so
+ * it can send a notification for that event to any listening process
+ */
+int send_message(int nl_sock, unsigned int exit_code)
+{
+	char buff[NL_MESSAGE_SIZE];
+	struct nlmsghdr *hdr;
+	struct cn_msg *msg;
+
+	hdr = (struct nlmsghdr *)buff;
+	hdr->nlmsg_len = NL_MESSAGE_SIZE;
+	hdr->nlmsg_type = NLMSG_DONE;
+	hdr->nlmsg_flags = 0;
+	hdr->nlmsg_seq = 0;
+	hdr->nlmsg_pid = getpid();
+
+	msg = (struct cn_msg *)NLMSG_DATA(hdr);
+	msg->id.idx = CN_IDX_PROC;
+	msg->id.val = CN_VAL_PROC;
+	msg->seq = 0;
+	msg->ack = 0;
+	msg->flags = 0;
+
+	msg->len = sizeof(struct proc_input);
+	((struct proc_input *)msg->data)->mcast_op =
+		PROC_CN_MCAST_NOTIFY;
+	((struct proc_input *)msg->data)->uexit_code = exit_code;
+
+	if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
+		perror("send failed");
+		return -errno;
+	}
+	return 0;
+}
+
+int notify_netlink_thread_exit(unsigned int exit_code)
+{
+	struct sockaddr_nl sa_nl;
+	int err = 0;
+	int nl_sock;
+
+	nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+	if (nl_sock == -1) {
+		perror("socket failed");
+		return -errno;
+	}
+
+	bzero(&sa_nl, sizeof(sa_nl));
+	sa_nl.nl_family = AF_NETLINK;
+	sa_nl.nl_groups = CN_IDX_PROC;
+	sa_nl.nl_pid    = gettid();
+
+	if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
+		perror("bind failed");
+		close(nl_sock);
+		return -errno;
+	}
+
+	err = send_message(nl_sock, exit_code);
+
+	close(nl_sock);
+
+	if (err < 0)
+		return err;
+
+	return 0;
+}