diff mbox

ARM: DMA-mapping: mark all !DMA_TO_DEVICE pages in unmapping as clean

Message ID 1367572365-18905-1-git-send-email-ming.lei@canonical.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ming Lei May 3, 2013, 9:12 a.m. UTC
It is common for one sg to include many pages, so mark all these
pages as clean to avoid unnecessary flushing on them in
set_pte_at() or update_mmu_cache().

The patch might improve loading performance of applciation code a bit.

On the below test code to read file(~1GByte size) from usb mass storage
disk to buffer created with mmap(PROT_READ | PROT_EXEC) on
Pandaboard, average ~1% improvement can be observed with the patch on
10 times test.


#define _GNU_SOURCE

#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <string.h>
#include <assert.h>

unsigned int sum = 0;

static unsigned long tv_diff(struct timeval *tv1, struct timeval *tv2)
{
	return (tv2->tv_sec - tv1->tv_sec) * 1000000 +
		(tv2->tv_usec - tv1->tv_usec);
}

int main(int argc, char *argv[])
{
	char *mbuffer;
	int fd;
	int i;
	unsigned long page_size, size;
	struct stat stat;
	struct timeval t1, t2;

	page_size = getpagesize();
	fd = open(argv[1], O_RDONLY);
	assert(fd >= 0);

	fstat(fd, &stat);
	size = stat.st_size;
	printf("%s: file %s, file size %lu, page size %lu\n", argv[0],
		read_filename, size, page_size);

	gettimeofday(&t1, NULL);
	mbuffer = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
	for (i = 0 ; i < size ; i += page_size)
		sum += mbuffer[i];
	munmap(mbuffer, page_size);
	gettimeofday(&t2, NULL);
	printf("\tread mmaped time: %luus\n", tv_diff(&t1, &t2));

	close(fd);
}

Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 arch/arm/mm/dma-mapping.c |   19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

Comments

Ming Lei May 13, 2013, 1:56 a.m. UTC | #1
Hello,

On Fri, May 3, 2013 at 5:12 PM, Ming Lei <ming.lei@canonical.com> wrote:
> It is common for one sg to include many pages, so mark all these
> pages as clean to avoid unnecessary flushing on them in
> set_pte_at() or update_mmu_cache().
>
> The patch might improve loading performance of applciation code a bit.
>
> On the below test code to read file(~1GByte size) from usb mass storage
> disk to buffer created with mmap(PROT_READ | PROT_EXEC) on
> Pandaboard, average ~1% improvement can be observed with the patch on
> 10 times test.
>
>
> #define _GNU_SOURCE
>
> #include <fcntl.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/mman.h>
> #include <sys/stat.h>
> #include <string.h>
> #include <assert.h>
>
> unsigned int sum = 0;
>
> static unsigned long tv_diff(struct timeval *tv1, struct timeval *tv2)
> {
>         return (tv2->tv_sec - tv1->tv_sec) * 1000000 +
>                 (tv2->tv_usec - tv1->tv_usec);
> }
>
> int main(int argc, char *argv[])
> {
>         char *mbuffer;
>         int fd;
>         int i;
>         unsigned long page_size, size;
>         struct stat stat;
>         struct timeval t1, t2;
>
>         page_size = getpagesize();
>         fd = open(argv[1], O_RDONLY);
>         assert(fd >= 0);
>
>         fstat(fd, &stat);
>         size = stat.st_size;
>         printf("%s: file %s, file size %lu, page size %lu\n", argv[0],
>                 read_filename, size, page_size);
>
>         gettimeofday(&t1, NULL);
>         mbuffer = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
>         for (i = 0 ; i < size ; i += page_size)
>                 sum += mbuffer[i];
>         munmap(mbuffer, page_size);
>         gettimeofday(&t2, NULL);
>         printf("\tread mmaped time: %luus\n", tv_diff(&t1, &t2));
>
>         close(fd);
> }
>
> Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Russell King <linux@arm.linux.org.uk>
> Signed-off-by: Ming Lei <ming.lei@canonical.com>

Gentle ping, :-)

Thanks,
--
Ming Lei
Nicolas Pitre May 14, 2013, 7:28 p.m. UTC | #2
On Fri, 3 May 2013, Ming Lei wrote:

> It is common for one sg to include many pages, so mark all these
> pages as clean to avoid unnecessary flushing on them in
> set_pte_at() or update_mmu_cache().

[...]

> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index e9db6b4..e63124a 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -879,10 +879,23 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
>  	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
>  
>  	/*
> -	 * Mark the D-cache clean for this page to avoid extra flushing.
> +	 * Mark the D-cache clean for these pages to avoid extra flushing.
>  	 */
> -	if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
> -		set_bit(PG_dcache_clean, &page->flags);
> +	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
> +		unsigned long pfn;
> +		size_t left = size;
> +
> +		pfn = page_to_pfn(page);

Could the offset ever be greater than a page?  The code in 
dma_cache_maint_page() appears to be written with that possibility in 
mind.  So you should have:

	pfn = page_to_pfn(page) + off / PAGE_SIZE;

> +		if (off) {
> +			pfn++;
> +			left -= PAGE_SIZE - off % PAGE_SIZE;
> +		}
> +		while (left >= PAGE_SIZE) {
> +			page = pfn_to_page(pfn++);
> +			set_bit(PG_dcache_clean, &page->flags);
> +			left -= PAGE_SIZE;
> +		}
> +	}
>  }

Otherwise this looks fine to me.


Nicolas
Ming Lei May 15, 2013, 2:59 a.m. UTC | #3
Hi,

On Wed, May 15, 2013 at 3:28 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Fri, 3 May 2013, Ming Lei wrote:
>
>> It is common for one sg to include many pages, so mark all these
>> pages as clean to avoid unnecessary flushing on them in
>> set_pte_at() or update_mmu_cache().
>
> [...]
>
>> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
>> index e9db6b4..e63124a 100644
>> --- a/arch/arm/mm/dma-mapping.c
>> +++ b/arch/arm/mm/dma-mapping.c
>> @@ -879,10 +879,23 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
>>       dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
>>
>>       /*
>> -      * Mark the D-cache clean for this page to avoid extra flushing.
>> +      * Mark the D-cache clean for these pages to avoid extra flushing.
>>        */
>> -     if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
>> -             set_bit(PG_dcache_clean, &page->flags);
>> +     if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
>> +             unsigned long pfn;
>> +             size_t left = size;
>> +
>> +             pfn = page_to_pfn(page);
>
> Could the offset ever be greater than a page?  The code in
> dma_cache_maint_page() appears to be written with that possibility in
> mind.  So you should have:
>
>         pfn = page_to_pfn(page) + off / PAGE_SIZE;

Good catch, you are right.

>
>> +             if (off) {
>> +                     pfn++;
>> +                     left -= PAGE_SIZE - off % PAGE_SIZE;
>> +             }
>> +             while (left >= PAGE_SIZE) {
>> +                     page = pfn_to_page(pfn++);
>> +                     set_bit(PG_dcache_clean, &page->flags);
>> +                     left -= PAGE_SIZE;
>> +             }
>> +     }
>>  }
>
> Otherwise this looks fine to me.

I will send v1 with your ack, thanks for your review.

Thanks,
--
Ming Lei
diff mbox

Patch

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e9db6b4..e63124a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -879,10 +879,23 @@  static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
 
 	/*
-	 * Mark the D-cache clean for this page to avoid extra flushing.
+	 * Mark the D-cache clean for these pages to avoid extra flushing.
 	 */
-	if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
-		set_bit(PG_dcache_clean, &page->flags);
+	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
+		unsigned long pfn;
+		size_t left = size;
+
+		pfn = page_to_pfn(page);
+		if (off) {
+			pfn++;
+			left -= PAGE_SIZE - off % PAGE_SIZE;
+		}
+		while (left >= PAGE_SIZE) {
+			page = pfn_to_page(pfn++);
+			set_bit(PG_dcache_clean, &page->flags);
+			left -= PAGE_SIZE;
+		}
+	}
 }
 
 /**