diff mbox series

[net-next,v3,1/3] selftests/ptr_ring: add benchmark application for ptr_ring

Message ID 1625142402-64945-2-git-send-email-linyunsheng@huawei.com (mailing list archive)
State New
Headers show
Series add benchmark selftest and optimization for ptr_ring | expand

Commit Message

Yunsheng Lin July 1, 2021, 12:26 p.m. UTC
Currently ptr_ring selftest is embedded within the virtio
selftest, which involves some specific virtio operation,
such as notifying and kicking.

As ptr_ring has been used by various subsystems, it deserves
it's owner selftest in order to benchmark different usecase
of ptr_ring, such as page pool and pfifo_fast qdisc.

So add a simple application to benchmark ptr_ring performance.
Currently two test mode is supported:
Mode 0: Both producing and consuming is done in a single thread,
        it is called simple test mode in the test app.
Mode 1: Producing and consuming is done in different thread
        concurrently, also known as SPSC(single-producer/
        single-consumer) test.

The multi-producer/single-consumer test for pfifo_fast case is
not added yet, which can be added if using CAS atomic operation
to enable lockless multi-producer is proved to be better than
using r->producer_lock.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
V3: Remove timestamp sampling, use standard C library as much
    as possible.
---
 MAINTAINERS                                      |   5 +
 tools/testing/selftests/ptr_ring/Makefile        |   6 +
 tools/testing/selftests/ptr_ring/ptr_ring_test.c | 224 +++++++++++++++++++++++
 tools/testing/selftests/ptr_ring/ptr_ring_test.h | 130 +++++++++++++
 4 files changed, 365 insertions(+)
 create mode 100644 tools/testing/selftests/ptr_ring/Makefile
 create mode 100644 tools/testing/selftests/ptr_ring/ptr_ring_test.c
 create mode 100644 tools/testing/selftests/ptr_ring/ptr_ring_test.h

Comments

Jason Wang July 2, 2021, 6:43 a.m. UTC | #1
在 2021/7/1 下午8:26, Yunsheng Lin 写道:
> Currently ptr_ring selftest is embedded within the virtio
> selftest, which involves some specific virtio operation,
> such as notifying and kicking.
>
> As ptr_ring has been used by various subsystems, it deserves
> it's owner selftest in order to benchmark different usecase
> of ptr_ring, such as page pool and pfifo_fast qdisc.
>
> So add a simple application to benchmark ptr_ring performance.
> Currently two test mode is supported:
> Mode 0: Both producing and consuming is done in a single thread,
>          it is called simple test mode in the test app.
> Mode 1: Producing and consuming is done in different thread
>          concurrently, also known as SPSC(single-producer/
>          single-consumer) test.
>
> The multi-producer/single-consumer test for pfifo_fast case is
> not added yet, which can be added if using CAS atomic operation
> to enable lockless multi-producer is proved to be better than
> using r->producer_lock.
>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
> V3: Remove timestamp sampling, use standard C library as much
>      as possible.
> ---
>   MAINTAINERS                                      |   5 +
>   tools/testing/selftests/ptr_ring/Makefile        |   6 +
>   tools/testing/selftests/ptr_ring/ptr_ring_test.c | 224 +++++++++++++++++++++++
>   tools/testing/selftests/ptr_ring/ptr_ring_test.h | 130 +++++++++++++
>   4 files changed, 365 insertions(+)
>   create mode 100644 tools/testing/selftests/ptr_ring/Makefile
>   create mode 100644 tools/testing/selftests/ptr_ring/ptr_ring_test.c
>   create mode 100644 tools/testing/selftests/ptr_ring/ptr_ring_test.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index cc375fd..1227022 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -14847,6 +14847,11 @@ F:	drivers/net/phy/dp83640*
>   F:	drivers/ptp/*
>   F:	include/linux/ptp_cl*
>   
> +PTR RING BENCHMARK
> +M:	Yunsheng Lin <linyunsheng@huawei.com>
> +L:	netdev@vger.kernel.org
> +F:	tools/testing/selftests/ptr_ring/
> +
>   PTRACE SUPPORT
>   M:	Oleg Nesterov <oleg@redhat.com>
>   S:	Maintained
> diff --git a/tools/testing/selftests/ptr_ring/Makefile b/tools/testing/selftests/ptr_ring/Makefile
> new file mode 100644
> index 0000000..346dea9
> --- /dev/null
> +++ b/tools/testing/selftests/ptr_ring/Makefile
> @@ -0,0 +1,6 @@
> +# SPDX-License-Identifier: GPL-2.0
> +LDLIBS = -lpthread
> +
> +TEST_GEN_PROGS := ptr_ring_test
> +
> +include ../lib.mk
> diff --git a/tools/testing/selftests/ptr_ring/ptr_ring_test.c b/tools/testing/selftests/ptr_ring/ptr_ring_test.c
> new file mode 100644
> index 0000000..4a5312f
> --- /dev/null
> +++ b/tools/testing/selftests/ptr_ring/ptr_ring_test.c
> @@ -0,0 +1,224 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2021 HiSilicon Limited.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <malloc.h>
> +#include <stdbool.h>
> +
> +#include "ptr_ring_test.h"
> +#include "../../../../include/linux/ptr_ring.h"
> +
> +#define MIN_RING_SIZE	2
> +#define MAX_RING_SIZE	10000000
> +
> +static struct ptr_ring ring ____cacheline_aligned_in_smp;
> +
> +struct worker_info {
> +	pthread_t tid;
> +	int test_count;
> +	bool error;
> +};
> +
> +static void *produce_worker(void *arg)
> +{
> +	struct worker_info *info = arg;
> +	unsigned long i = 0;
> +	int ret;
> +
> +	while (++i <= info->test_count) {
> +		while (__ptr_ring_full(&ring))
> +			cpu_relax();
> +
> +		ret = __ptr_ring_produce(&ring, (void *)i);
> +		if (ret) {
> +			fprintf(stderr, "produce failed: %d\n", ret);
> +			info->error = true;
> +			return NULL;
> +		}
> +	}
> +
> +	info->error = false;
> +
> +	return NULL;
> +}
> +
> +static void *consume_worker(void *arg)
> +{
> +	struct worker_info *info = arg;
> +	unsigned long i = 0;
> +	int *ptr;
> +
> +	while (++i <= info->test_count) {
> +		while (__ptr_ring_empty(&ring))
> +			cpu_relax();


Any reason for not simply use __ptr_ring_consume() here?


> +
> +		ptr = __ptr_ring_consume(&ring);
> +		if ((unsigned long)ptr != i) {
> +			fprintf(stderr, "consumer failed, ptr: %lu, i: %lu\n",
> +				(unsigned long)ptr, i);
> +			info->error = true;
> +			return NULL;
> +		}
> +	}
> +
> +	if (!__ptr_ring_empty(&ring)) {
> +		fprintf(stderr, "ring should be empty, test failed\n");
> +		info->error = true;
> +		return NULL;
> +	}
> +
> +	info->error = false;
> +	return NULL;
> +}
> +
> +/* test case for single producer single consumer */
> +static void spsc_test(int size, int count)
> +{
> +	struct worker_info producer, consumer;
> +	pthread_attr_t attr;
> +	void *res;
> +	int ret;
> +
> +	ret = ptr_ring_init(&ring, size, 0);
> +	if (ret) {
> +		fprintf(stderr, "init failed: %d\n", ret);
> +		return;
> +	}
> +
> +	producer.test_count = count;
> +	consumer.test_count = count;
> +
> +	ret = pthread_attr_init(&attr);
> +	if (ret) {
> +		fprintf(stderr, "pthread attr init failed: %d\n", ret);
> +		goto out;
> +	}
> +
> +	ret = pthread_create(&producer.tid, &attr,
> +			     produce_worker, &producer);
> +	if (ret) {
> +		fprintf(stderr, "create producer thread failed: %d\n", ret);
> +		goto out;
> +	}
> +
> +	ret = pthread_create(&consumer.tid, &attr,
> +			     consume_worker, &consumer);
> +	if (ret) {
> +		fprintf(stderr, "create consumer thread failed: %d\n", ret);
> +		goto out;
> +	}
> +
> +	ret = pthread_join(producer.tid, &res);
> +	if (ret) {
> +		fprintf(stderr, "join producer thread failed: %d\n", ret);
> +		goto out;
> +	}
> +
> +	ret = pthread_join(consumer.tid, &res);
> +	if (ret) {
> +		fprintf(stderr, "join consumer thread failed: %d\n", ret);
> +		goto out;
> +	}
> +
> +	if (producer.error || consumer.error) {
> +		fprintf(stderr, "spsc test failed\n");
> +		goto out;
> +	}
> +
> +	printf("ptr_ring(size:%d) perf spsc test produced/comsumed %d items, finished\n",
> +	       size, count);
> +out:
> +	ptr_ring_cleanup(&ring, NULL);
> +}
> +
> +static void simple_test(int size, int count)
> +{
> +	struct timeval start, end;
> +	int i = 0;
> +	int *ptr;
> +	int ret;
> +
> +	ret = ptr_ring_init(&ring, size, 0);
> +	if (ret) {
> +		fprintf(stderr, "init failed: %d\n", ret);
> +		return;
> +	}
> +
> +	while (++i <= count) {
> +		ret = __ptr_ring_produce(&ring, &count);
> +		if (ret) {
> +			fprintf(stderr, "produce failed: %d\n", ret);
> +			goto out;
> +		}
> +
> +		ptr = __ptr_ring_consume(&ring);
> +		if (ptr != &count)  {
> +			fprintf(stderr, "consume failed: %p\n", ptr);
> +			goto out;
> +		}
> +	}
> +
> +	printf("ptr_ring(size:%d) perf simple test produced/consumed %d items, finished\n",
> +	       size, count);
> +
> +out:
> +	ptr_ring_cleanup(&ring, NULL);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int count = 1000000;
> +	int size = 1000;
> +	int mode = 0;
> +	int opt;
> +
> +	while ((opt = getopt(argc, argv, "N:s:m:h")) != -1) {
> +		switch (opt) {
> +		case 'N':
> +			count = atoi(optarg);
> +			break;
> +		case 's':
> +			size = atoi(optarg);
> +			break;
> +		case 'm':
> +			mode = atoi(optarg);
> +			break;
> +		case 'h':
> +			printf("usage: ptr_ring_test [-N COUNT] [-s RING_SIZE] [-m TEST_MODE]\n");
> +			return 0;
> +		default:
> +			return -1;
> +		}
> +	}
> +
> +	if (count <= 0) {
> +		fprintf(stderr, "invalid test count, must be > 0\n");
> +		return -1;
> +	}
> +
> +	if (size < MIN_RING_SIZE || size > MAX_RING_SIZE) {
> +		fprintf(stderr, "invalid ring size, must be in %d-%d\n",
> +			MIN_RING_SIZE, MAX_RING_SIZE);
> +		return -1;
> +	}
> +
> +	switch (mode) {
> +	case 0:
> +		simple_test(size, count);
> +		break;
> +	case 1:
> +		spsc_test(size, count);
> +		break;
> +	default:
> +		fprintf(stderr, "invalid test mode\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/tools/testing/selftests/ptr_ring/ptr_ring_test.h b/tools/testing/selftests/ptr_ring/ptr_ring_test.h
> new file mode 100644
> index 0000000..32bfefb
> --- /dev/null
> +++ b/tools/testing/selftests/ptr_ring/ptr_ring_test.h


Let's reuse ptr_ring.c in tools/virtio/ringtest. Nothing virt specific 
there.

Thanks


> @@ -0,0 +1,130 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +#ifndef _TEST_PTR_RING_TEST_H
> +#define _TEST_PTR_RING_TEST_H
> +
> +#include <assert.h>
> +#include <stdatomic.h>
> +#include <pthread.h>
> +
> +/* Assuming the cache line size is 64 for most cpu,
> + * change it accordingly if the running cpu has different
> + * cache line size in order to get more accurate result.
> + */
> +#define SMP_CACHE_BYTES	64
> +
> +#define cpu_relax()	sched_yield()
> +#define smp_release()	atomic_thread_fence(memory_order_release)
> +#define smp_acquire()	atomic_thread_fence(memory_order_acquire)
> +#define smp_wmb()	smp_release()
> +#define smp_store_release(p, v)	\
> +		atomic_store_explicit(p, v, memory_order_release)
> +
> +#define READ_ONCE(x)		(*(volatile typeof(x) *)&(x))
> +#define WRITE_ONCE(x, val)	((*(volatile typeof(x) *)&(x)) = (val))
> +#define cache_line_size		SMP_CACHE_BYTES
> +#define unlikely(x)		(__builtin_expect(!!(x), 0))
> +#define likely(x)		(__builtin_expect(!!(x), 1))
> +#define ALIGN(x, a)		(((x) + (a) - 1) / (a) * (a))
> +#define SIZE_MAX		(~(size_t)0)
> +#define KMALLOC_MAX_SIZE	SIZE_MAX
> +#define spinlock_t		pthread_spinlock_t
> +#define gfp_t			int
> +#define __GFP_ZERO		0x1
> +
> +#define ____cacheline_aligned_in_smp \
> +		__attribute__((aligned(SMP_CACHE_BYTES)))
> +
> +static inline void *kmalloc(unsigned int size, gfp_t gfp)
> +{
> +	void *p;
> +
> +	p = memalign(64, size);
> +	if (!p)
> +		return p;
> +
> +	if (gfp & __GFP_ZERO)
> +		memset(p, 0, size);
> +
> +	return p;
> +}
> +
> +static inline void *kzalloc(unsigned int size, gfp_t flags)
> +{
> +	return kmalloc(size, flags | __GFP_ZERO);
> +}
> +
> +static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
> +{
> +	if (size != 0 && n > SIZE_MAX / size)
> +		return NULL;
> +	return kmalloc(n * size, flags);
> +}
> +
> +static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
> +{
> +	return kmalloc_array(n, size, flags | __GFP_ZERO);
> +}
> +
> +static inline void kfree(void *p)
> +{
> +	free(p);
> +}
> +
> +#define kvmalloc_array		kmalloc_array
> +#define kvfree			kfree
> +
> +static inline void spin_lock_init(spinlock_t *lock)
> +{
> +	int r = pthread_spin_init(lock, 0);
> +
> +	assert(!r);
> +}
> +
> +static inline void spin_lock(spinlock_t *lock)
> +{
> +	int ret = pthread_spin_lock(lock);
> +
> +	assert(!ret);
> +}
> +
> +static inline void spin_unlock(spinlock_t *lock)
> +{
> +	int ret = pthread_spin_unlock(lock);
> +
> +	assert(!ret);
> +}
> +
> +static inline void spin_lock_bh(spinlock_t *lock)
> +{
> +	spin_lock(lock);
> +}
> +
> +static inline void spin_unlock_bh(spinlock_t *lock)
> +{
> +	spin_unlock(lock);
> +}
> +
> +static inline void spin_lock_irq(spinlock_t *lock)
> +{
> +	spin_lock(lock);
> +}
> +
> +static inline void spin_unlock_irq(spinlock_t *lock)
> +{
> +	spin_unlock(lock);
> +}
> +
> +static inline void spin_lock_irqsave(spinlock_t *lock,
> +				     unsigned long f)
> +{
> +	spin_lock(lock);
> +}
> +
> +static inline void spin_unlock_irqrestore(spinlock_t *lock,
> +					  unsigned long f)
> +{
> +	spin_unlock(lock);
> +}
> +
> +#endif
Yunsheng Lin July 2, 2021, 8:17 a.m. UTC | #2
On 2021/7/2 14:43, Jason Wang wrote:
> 
> 在 2021/7/1 下午8:26, Yunsheng Lin 写道:
>> Currently ptr_ring selftest is embedded within the virtio
>> selftest, which involves some specific virtio operation,
>> such as notifying and kicking.
>>
>> As ptr_ring has been used by various subsystems, it deserves
>> it's owner selftest in order to benchmark different usecase
>> of ptr_ring, such as page pool and pfifo_fast qdisc.
>>
>> So add a simple application to benchmark ptr_ring performance.
>> Currently two test mode is supported:
>> Mode 0: Both producing and consuming is done in a single thread,
>>          it is called simple test mode in the test app.
>> Mode 1: Producing and consuming is done in different thread
>>          concurrently, also known as SPSC(single-producer/
>>          single-consumer) test.
>>
>> The multi-producer/single-consumer test for pfifo_fast case is
>> not added yet, which can be added if using CAS atomic operation
>> to enable lockless multi-producer is proved to be better than
>> using r->producer_lock.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> ---
>> V3: Remove timestamp sampling, use standard C library as much
>>      as possible.

[...]

>> +static void *produce_worker(void *arg)
>> +{
>> +    struct worker_info *info = arg;
>> +    unsigned long i = 0;
>> +    int ret;
>> +
>> +    while (++i <= info->test_count) {
>> +        while (__ptr_ring_full(&ring))
>> +            cpu_relax();
>> +
>> +        ret = __ptr_ring_produce(&ring, (void *)i);
>> +        if (ret) {
>> +            fprintf(stderr, "produce failed: %d\n", ret);
>> +            info->error = true;
>> +            return NULL;
>> +        }
>> +    }
>> +
>> +    info->error = false;
>> +
>> +    return NULL;
>> +}
>> +
>> +static void *consume_worker(void *arg)
>> +{
>> +    struct worker_info *info = arg;
>> +    unsigned long i = 0;
>> +    int *ptr;
>> +
>> +    while (++i <= info->test_count) {
>> +        while (__ptr_ring_empty(&ring))
>> +            cpu_relax();
> 
> 
> Any reason for not simply use __ptr_ring_consume() here?

No particular reason, just to make sure the ring is
non-empty before doing the enqueuing, we could check
if the __ptr_ring_consume() return NULL to decide
the if the ring is empty. Using __ptr_ring_consume()
here enable testing the correctness and performance of
__ptr_ring_consume() too.

> 
> 
>> +
>> +        ptr = __ptr_ring_consume(&ring);
>> +        if ((unsigned long)ptr != i) {
>> +            fprintf(stderr, "consumer failed, ptr: %lu, i: %lu\n",
>> +                (unsigned long)ptr, i);
>> +            info->error = true;
>> +            return NULL;
>> +        }
>> +    }
>> +
>> +    if (!__ptr_ring_empty(&ring)) {
>> +        fprintf(stderr, "ring should be empty, test failed\n");
>> +        info->error = true;
>> +        return NULL;
>> +    }
>> +
>> +    info->error = false;
>> +    return NULL;
>> +}
>> +

[...]

>> +
>> +    return 0;
>> +}
>> diff --git a/tools/testing/selftests/ptr_ring/ptr_ring_test.h b/tools/testing/selftests/ptr_ring/ptr_ring_test.h
>> new file mode 100644
>> index 0000000..32bfefb
>> --- /dev/null
>> +++ b/tools/testing/selftests/ptr_ring/ptr_ring_test.h
> 
> 
> Let's reuse ptr_ring.c in tools/virtio/ringtest. Nothing virt specific there.

It *does* have some virtio specific at the end of ptr_ring.c.
It can be argued that the ptr_ring.c in tools/virtio/ringtest
could be refactored to remove the function related to virtio.

But as mentioned in the previous disscusion [1], the tools/virtio/
seems to have compile error in the latest kernel, it does not seems
right to reuse that. And most of testcase in tools/virtio/ seems
better be in tools/virtio/ringtest instead,so until the testcase
in tools/virtio/ is compile-error-free and moved to tools/testing/
selftests/, it seems better not to reuse it for now.

1. https://patchwork.kernel.org/project/netdevbpf/patch/1624591136-6647-2-git-send-email-linyunsheng@huawei.com/#24278945

> 
> Thanks
> 

[...]

> 
> .
>
Michael S. Tsirkin July 2, 2021, 8:30 a.m. UTC | #3
On Fri, Jul 02, 2021 at 04:17:17PM +0800, Yunsheng Lin wrote:
> > Let's reuse ptr_ring.c in tools/virtio/ringtest. Nothing virt specific there.
> 
> It *does* have some virtio specific at the end of ptr_ring.c.
> It can be argued that the ptr_ring.c in tools/virtio/ringtest
> could be refactored to remove the function related to virtio.
> 
> But as mentioned in the previous disscusion [1], the tools/virtio/
> seems to have compile error in the latest kernel, it does not seems
> right to reuse that.
> And most of testcase in tools/virtio/ seems
> better be in tools/virtio/ringtest instead,so until the testcase
> in tools/virtio/ is compile-error-free and moved to tools/testing/
> selftests/, it seems better not to reuse it for now.


That's a great reason to reuse - so tools/virtio/ stays working.
Please just fix that.
Yunsheng Lin July 2, 2021, 8:46 a.m. UTC | #4
On 2021/7/2 16:30, Michael S. Tsirkin wrote:
> On Fri, Jul 02, 2021 at 04:17:17PM +0800, Yunsheng Lin wrote:
>>> Let's reuse ptr_ring.c in tools/virtio/ringtest. Nothing virt specific there.
>>
>> It *does* have some virtio specific at the end of ptr_ring.c.
>> It can be argued that the ptr_ring.c in tools/virtio/ringtest
>> could be refactored to remove the function related to virtio.
>>
>> But as mentioned in the previous disscusion [1], the tools/virtio/
>> seems to have compile error in the latest kernel, it does not seems
>> right to reuse that.
>> And most of testcase in tools/virtio/ seems
>> better be in tools/virtio/ringtest instead,so until the testcase
>> in tools/virtio/ is compile-error-free and moved to tools/testing/
>> selftests/, it seems better not to reuse it for now.
> 
> 
> That's a great reason to reuse - so tools/virtio/ stays working.
> Please just fix that.

I understand that you guys like to see a working testcase of virtio.
I would love to do that if I have the time and knowledge of virtio,
But I do not think I have the time and I am familiar enough with
virtio to fix that now.


>
Jason Wang July 2, 2021, 9:04 a.m. UTC | #5
在 2021/7/2 下午4:46, Yunsheng Lin 写道:
> On 2021/7/2 16:30, Michael S. Tsirkin wrote:
>> On Fri, Jul 02, 2021 at 04:17:17PM +0800, Yunsheng Lin wrote:
>>>> Let's reuse ptr_ring.c in tools/virtio/ringtest. Nothing virt specific there.
>>> It *does* have some virtio specific at the end of ptr_ring.c.


They are just wrappers to make ptr ring works like a virtio ring. We can 
split them out into another file if necessary.


>>> It can be argued that the ptr_ring.c in tools/virtio/ringtest
>>> could be refactored to remove the function related to virtio.
>>>
>>> But as mentioned in the previous disscusion [1], the tools/virtio/
>>> seems to have compile error in the latest kernel, it does not seems
>>> right to reuse that.
>>> And most of testcase in tools/virtio/ seems
>>> better be in tools/virtio/ringtest instead,so until the testcase
>>> in tools/virtio/ is compile-error-free and moved to tools/testing/
>>> selftests/, it seems better not to reuse it for now.
>>
>> That's a great reason to reuse - so tools/virtio/ stays working.
>> Please just fix that.


+1


> I understand that you guys like to see a working testcase of virtio.
> I would love to do that if I have the time and knowledge of virtio,
> But I do not think I have the time and I am familiar enough with
> virtio to fix that now.


So ringtest is used for bench-marking the ring performance for different 
format. Virtio is only one of the supported ring format, ptr ring is 
another. Wrappers were used to reuse the same test logic.

Though you may see host/guest in the test, it's in fact done via two 
processes.

We need figure out:

1) why the current ringtest.c does not fit for your requirement (it has 
SPSC test)
2) why can't we tweak the ptr_ring.c to be used by both ring_test and 
your benchmark

If neither of the above work, we can invent new ptr_ring infrastructure 
under tests/

Thanks


>
>
Yunsheng Lin July 2, 2021, 9:54 a.m. UTC | #6
On 2021/7/2 17:04, Jason Wang wrote:
> 

[...]

> 
> 
>> I understand that you guys like to see a working testcase of virtio.
>> I would love to do that if I have the time and knowledge of virtio,
>> But I do not think I have the time and I am familiar enough with
>> virtio to fix that now.
> 
> 
> So ringtest is used for bench-marking the ring performance for different format. Virtio is only one of the supported ring format, ptr ring is another. Wrappers were used to reuse the same test logic.
> 
> Though you may see host/guest in the test, it's in fact done via two processes.
> 
> We need figure out:
> 
> 1) why the current ringtest.c does not fit for your requirement (it has SPSC test)

There is MPSC case used by pfifo_fast, it make more sense to use a separate selftest
for ptr_ring as ptr_ring has been used by various subsystems.


> 2) why can't we tweak the ptr_ring.c to be used by both ring_test and your benchmark

Actually that is what I do in this patch, move the specific part related to ptr_ring
to ptr_ring_test.h. When the virtio testing is refactored to work, it can reuse the
abstract layer in ptr_ring_test.h too.

> 
> If neither of the above work, we can invent new ptr_ring infrastructure under tests/
> 
> Thanks
> 
> 
>>
>>
> 
> .
>
Michael S. Tsirkin July 2, 2021, 2:16 p.m. UTC | #7
On Fri, Jul 02, 2021 at 05:04:44PM +0800, Jason Wang wrote:
> 
> 在 2021/7/2 下午4:46, Yunsheng Lin 写道:
> > On 2021/7/2 16:30, Michael S. Tsirkin wrote:
> > > On Fri, Jul 02, 2021 at 04:17:17PM +0800, Yunsheng Lin wrote:
> > > > > Let's reuse ptr_ring.c in tools/virtio/ringtest. Nothing virt specific there.
> > > > It *does* have some virtio specific at the end of ptr_ring.c.
> 
> 
> They are just wrappers to make ptr ring works like a virtio ring. We can
> split them out into another file if necessary.
> 
> 
> > > > It can be argued that the ptr_ring.c in tools/virtio/ringtest
> > > > could be refactored to remove the function related to virtio.
> > > > 
> > > > But as mentioned in the previous disscusion [1], the tools/virtio/
> > > > seems to have compile error in the latest kernel, it does not seems
> > > > right to reuse that.
> > > > And most of testcase in tools/virtio/ seems
> > > > better be in tools/virtio/ringtest instead,so until the testcase
> > > > in tools/virtio/ is compile-error-free and moved to tools/testing/
> > > > selftests/, it seems better not to reuse it for now.
> > > 
> > > That's a great reason to reuse - so tools/virtio/ stays working.
> > > Please just fix that.
> 
> 
> +1
> 
> 
> > I understand that you guys like to see a working testcase of virtio.
> > I would love to do that if I have the time and knowledge of virtio,
> > But I do not think I have the time and I am familiar enough with
> > virtio to fix that now.
> 
> 
> So ringtest is used for bench-marking the ring performance for different
> format. Virtio is only one of the supported ring format, ptr ring is
> another. Wrappers were used to reuse the same test logic.
> 
> Though you may see host/guest in the test, it's in fact done via two
> processes.
> 
> We need figure out:
> 
> 1) why the current ringtest.c does not fit for your requirement (it has SPSC
> test)
> 2) why can't we tweak the ptr_ring.c to be used by both ring_test and your
> benchmark
> 
> If neither of the above work, we can invent new ptr_ring infrastructure
> under tests/
> 
> Thanks

For me 1) is not a question.

All the available/used terminology is not an ideal fit for ptr ring.
With virtio buffers are always owned by driver (producer) so producer
has a way to find out if a buffer has been consumed.  With ptr ring
there's no way for producer to know a buffer has been consumed.
The test hacks around that but it is very reasonable
not to want to rely on that.

However 2) is very much a question. We can split ptr_ring
to the preamble and virtio related hacks.
So all the portability infrastructure for building
kernel code from userspace, command line parsing,
run-on-all.sh to figure out affinity effects,
all that can and should IMHO be reused and not copy-pasted.

> 
> > 
> >
Michael S. Tsirkin July 2, 2021, 2:18 p.m. UTC | #8
On Fri, Jul 02, 2021 at 05:54:42PM +0800, Yunsheng Lin wrote:
> On 2021/7/2 17:04, Jason Wang wrote:
> > 
> 
> [...]
> 
> > 
> > 
> >> I understand that you guys like to see a working testcase of virtio.
> >> I would love to do that if I have the time and knowledge of virtio,
> >> But I do not think I have the time and I am familiar enough with
> >> virtio to fix that now.
> > 
> > 
> > So ringtest is used for bench-marking the ring performance for different format. Virtio is only one of the supported ring format, ptr ring is another. Wrappers were used to reuse the same test logic.
> > 
> > Though you may see host/guest in the test, it's in fact done via two processes.
> > 
> > We need figure out:
> > 
> > 1) why the current ringtest.c does not fit for your requirement (it has SPSC test)
> 
> There is MPSC case used by pfifo_fast, it make more sense to use a separate selftest
> for ptr_ring as ptr_ring has been used by various subsystems.
> 
> 
> > 2) why can't we tweak the ptr_ring.c to be used by both ring_test and your benchmark
> 
> Actually that is what I do in this patch, move the specific part related to ptr_ring
> to ptr_ring_test.h. When the virtio testing is refactored to work, it can reuse the
> abstract layer in ptr_ring_test.h too.

Sounds good. But that refactoring will be up to you as a contributor.

> > 
> > If neither of the above work, we can invent new ptr_ring infrastructure under tests/
> > 
> > Thanks
> > 
> > 
> >>
> >>
> > 
> > .
> >
Yunsheng Lin July 5, 2021, 1:43 a.m. UTC | #9
On 2021/7/2 22:18, Michael S. Tsirkin wrote:
> On Fri, Jul 02, 2021 at 05:54:42PM +0800, Yunsheng Lin wrote:
>> On 2021/7/2 17:04, Jason Wang wrote:
>>>
>>
>> [...]
>>
>>>
>>>
>>>> I understand that you guys like to see a working testcase of virtio.
>>>> I would love to do that if I have the time and knowledge of virtio,
>>>> But I do not think I have the time and I am familiar enough with
>>>> virtio to fix that now.
>>>
>>>
>>> So ringtest is used for bench-marking the ring performance for different format. Virtio is only one of the supported ring format, ptr ring is another. Wrappers were used to reuse the same test logic.
>>>
>>> Though you may see host/guest in the test, it's in fact done via two processes.
>>>
>>> We need figure out:
>>>
>>> 1) why the current ringtest.c does not fit for your requirement (it has SPSC test)
>>
>> There is MPSC case used by pfifo_fast, it make more sense to use a separate selftest
>> for ptr_ring as ptr_ring has been used by various subsystems.
>>
>>
>>> 2) why can't we tweak the ptr_ring.c to be used by both ring_test and your benchmark
>>
>> Actually that is what I do in this patch, move the specific part related to ptr_ring
>> to ptr_ring_test.h. When the virtio testing is refactored to work, it can reuse the
>> abstract layer in ptr_ring_test.h too.
> 
> Sounds good. But that refactoring will be up to you as a contributor.

It seems that tools/include/* have a lot of portability infrastructure for building
kernel code from userspace, will try to refactor the ptr_ring.h to use the portability
infrastructure in tools/include/* when building ptr_ring.h from userspace.

>
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index cc375fd..1227022 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14847,6 +14847,11 @@  F:	drivers/net/phy/dp83640*
 F:	drivers/ptp/*
 F:	include/linux/ptp_cl*
 
+PTR RING BENCHMARK
+M:	Yunsheng Lin <linyunsheng@huawei.com>
+L:	netdev@vger.kernel.org
+F:	tools/testing/selftests/ptr_ring/
+
 PTRACE SUPPORT
 M:	Oleg Nesterov <oleg@redhat.com>
 S:	Maintained
diff --git a/tools/testing/selftests/ptr_ring/Makefile b/tools/testing/selftests/ptr_ring/Makefile
new file mode 100644
index 0000000..346dea9
--- /dev/null
+++ b/tools/testing/selftests/ptr_ring/Makefile
@@ -0,0 +1,6 @@ 
+# SPDX-License-Identifier: GPL-2.0
+LDLIBS = -lpthread
+
+TEST_GEN_PROGS := ptr_ring_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ptr_ring/ptr_ring_test.c b/tools/testing/selftests/ptr_ring/ptr_ring_test.c
new file mode 100644
index 0000000..4a5312f
--- /dev/null
+++ b/tools/testing/selftests/ptr_ring/ptr_ring_test.c
@@ -0,0 +1,224 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 HiSilicon Limited.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <malloc.h>
+#include <stdbool.h>
+
+#include "ptr_ring_test.h"
+#include "../../../../include/linux/ptr_ring.h"
+
+#define MIN_RING_SIZE	2
+#define MAX_RING_SIZE	10000000
+
+static struct ptr_ring ring ____cacheline_aligned_in_smp;
+
+struct worker_info {
+	pthread_t tid;
+	int test_count;
+	bool error;
+};
+
+static void *produce_worker(void *arg)
+{
+	struct worker_info *info = arg;
+	unsigned long i = 0;
+	int ret;
+
+	while (++i <= info->test_count) {
+		while (__ptr_ring_full(&ring))
+			cpu_relax();
+
+		ret = __ptr_ring_produce(&ring, (void *)i);
+		if (ret) {
+			fprintf(stderr, "produce failed: %d\n", ret);
+			info->error = true;
+			return NULL;
+		}
+	}
+
+	info->error = false;
+
+	return NULL;
+}
+
+static void *consume_worker(void *arg)
+{
+	struct worker_info *info = arg;
+	unsigned long i = 0;
+	int *ptr;
+
+	while (++i <= info->test_count) {
+		while (__ptr_ring_empty(&ring))
+			cpu_relax();
+
+		ptr = __ptr_ring_consume(&ring);
+		if ((unsigned long)ptr != i) {
+			fprintf(stderr, "consumer failed, ptr: %lu, i: %lu\n",
+				(unsigned long)ptr, i);
+			info->error = true;
+			return NULL;
+		}
+	}
+
+	if (!__ptr_ring_empty(&ring)) {
+		fprintf(stderr, "ring should be empty, test failed\n");
+		info->error = true;
+		return NULL;
+	}
+
+	info->error = false;
+	return NULL;
+}
+
+/* test case for single producer single consumer */
+static void spsc_test(int size, int count)
+{
+	struct worker_info producer, consumer;
+	pthread_attr_t attr;
+	void *res;
+	int ret;
+
+	ret = ptr_ring_init(&ring, size, 0);
+	if (ret) {
+		fprintf(stderr, "init failed: %d\n", ret);
+		return;
+	}
+
+	producer.test_count = count;
+	consumer.test_count = count;
+
+	ret = pthread_attr_init(&attr);
+	if (ret) {
+		fprintf(stderr, "pthread attr init failed: %d\n", ret);
+		goto out;
+	}
+
+	ret = pthread_create(&producer.tid, &attr,
+			     produce_worker, &producer);
+	if (ret) {
+		fprintf(stderr, "create producer thread failed: %d\n", ret);
+		goto out;
+	}
+
+	ret = pthread_create(&consumer.tid, &attr,
+			     consume_worker, &consumer);
+	if (ret) {
+		fprintf(stderr, "create consumer thread failed: %d\n", ret);
+		goto out;
+	}
+
+	ret = pthread_join(producer.tid, &res);
+	if (ret) {
+		fprintf(stderr, "join producer thread failed: %d\n", ret);
+		goto out;
+	}
+
+	ret = pthread_join(consumer.tid, &res);
+	if (ret) {
+		fprintf(stderr, "join consumer thread failed: %d\n", ret);
+		goto out;
+	}
+
+	if (producer.error || consumer.error) {
+		fprintf(stderr, "spsc test failed\n");
+		goto out;
+	}
+
+	printf("ptr_ring(size:%d) perf spsc test produced/comsumed %d items, finished\n",
+	       size, count);
+out:
+	ptr_ring_cleanup(&ring, NULL);
+}
+
+static void simple_test(int size, int count)
+{
+	struct timeval start, end;
+	int i = 0;
+	int *ptr;
+	int ret;
+
+	ret = ptr_ring_init(&ring, size, 0);
+	if (ret) {
+		fprintf(stderr, "init failed: %d\n", ret);
+		return;
+	}
+
+	while (++i <= count) {
+		ret = __ptr_ring_produce(&ring, &count);
+		if (ret) {
+			fprintf(stderr, "produce failed: %d\n", ret);
+			goto out;
+		}
+
+		ptr = __ptr_ring_consume(&ring);
+		if (ptr != &count)  {
+			fprintf(stderr, "consume failed: %p\n", ptr);
+			goto out;
+		}
+	}
+
+	printf("ptr_ring(size:%d) perf simple test produced/consumed %d items, finished\n",
+	       size, count);
+
+out:
+	ptr_ring_cleanup(&ring, NULL);
+}
+
+int main(int argc, char *argv[])
+{
+	int count = 1000000;
+	int size = 1000;
+	int mode = 0;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "N:s:m:h")) != -1) {
+		switch (opt) {
+		case 'N':
+			count = atoi(optarg);
+			break;
+		case 's':
+			size = atoi(optarg);
+			break;
+		case 'm':
+			mode = atoi(optarg);
+			break;
+		case 'h':
+			printf("usage: ptr_ring_test [-N COUNT] [-s RING_SIZE] [-m TEST_MODE]\n");
+			return 0;
+		default:
+			return -1;
+		}
+	}
+
+	if (count <= 0) {
+		fprintf(stderr, "invalid test count, must be > 0\n");
+		return -1;
+	}
+
+	if (size < MIN_RING_SIZE || size > MAX_RING_SIZE) {
+		fprintf(stderr, "invalid ring size, must be in %d-%d\n",
+			MIN_RING_SIZE, MAX_RING_SIZE);
+		return -1;
+	}
+
+	switch (mode) {
+	case 0:
+		simple_test(size, count);
+		break;
+	case 1:
+		spsc_test(size, count);
+		break;
+	default:
+		fprintf(stderr, "invalid test mode\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/ptr_ring/ptr_ring_test.h b/tools/testing/selftests/ptr_ring/ptr_ring_test.h
new file mode 100644
index 0000000..32bfefb
--- /dev/null
+++ b/tools/testing/selftests/ptr_ring/ptr_ring_test.h
@@ -0,0 +1,130 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _TEST_PTR_RING_TEST_H
+#define _TEST_PTR_RING_TEST_H
+
+#include <assert.h>
+#include <stdatomic.h>
+#include <pthread.h>
+
+/* Assuming the cache line size is 64 for most cpu,
+ * change it accordingly if the running cpu has different
+ * cache line size in order to get more accurate result.
+ */
+#define SMP_CACHE_BYTES	64
+
+#define cpu_relax()	sched_yield()
+#define smp_release()	atomic_thread_fence(memory_order_release)
+#define smp_acquire()	atomic_thread_fence(memory_order_acquire)
+#define smp_wmb()	smp_release()
+#define smp_store_release(p, v)	\
+		atomic_store_explicit(p, v, memory_order_release)
+
+#define READ_ONCE(x)		(*(volatile typeof(x) *)&(x))
+#define WRITE_ONCE(x, val)	((*(volatile typeof(x) *)&(x)) = (val))
+#define cache_line_size		SMP_CACHE_BYTES
+#define unlikely(x)		(__builtin_expect(!!(x), 0))
+#define likely(x)		(__builtin_expect(!!(x), 1))
+#define ALIGN(x, a)		(((x) + (a) - 1) / (a) * (a))
+#define SIZE_MAX		(~(size_t)0)
+#define KMALLOC_MAX_SIZE	SIZE_MAX
+#define spinlock_t		pthread_spinlock_t
+#define gfp_t			int
+#define __GFP_ZERO		0x1
+
+#define ____cacheline_aligned_in_smp \
+		__attribute__((aligned(SMP_CACHE_BYTES)))
+
+static inline void *kmalloc(unsigned int size, gfp_t gfp)
+{
+	void *p;
+
+	p = memalign(64, size);
+	if (!p)
+		return p;
+
+	if (gfp & __GFP_ZERO)
+		memset(p, 0, size);
+
+	return p;
+}
+
+static inline void *kzalloc(unsigned int size, gfp_t flags)
+{
+	return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+	if (size != 0 && n > SIZE_MAX / size)
+		return NULL;
+	return kmalloc(n * size, flags);
+}
+
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+	return kmalloc_array(n, size, flags | __GFP_ZERO);
+}
+
+static inline void kfree(void *p)
+{
+	free(p);
+}
+
+#define kvmalloc_array		kmalloc_array
+#define kvfree			kfree
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+	int r = pthread_spin_init(lock, 0);
+
+	assert(!r);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+	int ret = pthread_spin_lock(lock);
+
+	assert(!ret);
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+	int ret = pthread_spin_unlock(lock);
+
+	assert(!ret);
+}
+
+static inline void spin_lock_bh(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static inline void spin_unlock_bh(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+static inline void spin_lock_irq(spinlock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static inline void spin_unlock_irq(spinlock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+static inline void spin_lock_irqsave(spinlock_t *lock,
+				     unsigned long f)
+{
+	spin_lock(lock);
+}
+
+static inline void spin_unlock_irqrestore(spinlock_t *lock,
+					  unsigned long f)
+{
+	spin_unlock(lock);
+}
+
+#endif