diff mbox

IPC SHM alignment on ARMv7

Message ID 510A7262.4060307@parallels.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexander Kartashov Jan. 31, 2013, 1:32 p.m. UTC
Dear colleagues,

It tuned out that IPC SHM works in a bit strange way on ARMv7:
the syscall sys_shmat() requires the argument shmaddr to be SHMLBA-aligned:

[ipc/shm.c]
[...]
  979     else if ((addr = (ulong)shmaddr)) {
  980         if (addr & (shmlba - 1)) {
  981             if (shmflg & SHM_RND)
  982                 addr &= ~(shmlba - 1);     /* round down */
  983             else
  984 #ifndef __ARCH_FORCE_SHMLBA
  985                 if (addr & ~PAGE_MASK)
  986 #endif
  987                     goto out;
  988         }
  989         flags = MAP_SHARED | MAP_FIXED;
[...]

since macro __ARCH_FORCE_SHMLBA is unconditionally defined for the ARM
architecture. However it uses the function arch_get_unmapped_area()
introduced in the commit 4197692eef113eeb8e3e413cc70993a5e667e5b8
in the mainstream kernel to allocate memory for a SHM segment.
However the function allocates SHMLBA-aligned memory only if
I or D caches alias as the following comment reads:

[arch/arm/mm/mmap.c]
[...]
  54 unsigned long
  55 arch_get_unmapped_area(struct file *filp, unsigned long addr,
  56         unsigned long len, unsigned long pgoff, unsigned long flags)
  57 {
  58     struct mm_struct *mm = current->mm;
  59     struct vm_area_struct *vma;
  60     int do_align = 0;
  61     int aliasing = cache_is_vipt_aliasing();
  62     struct vm_unmapped_area_info info;
  63
  64     /*
  65      * We only need to do colour alignment if either the I or D
  66      * caches alias.
  67      */
  68     if (aliasing)
  69         do_align = filp || (flags & MAP_SHARED);
[...]

So a SHM segment isn't always SHMLBA-aligned.


This results in the following inconvenience: the address returned
by the syscall sys_shmat() may not be passed as its argument
later. This is however crucial for implementing IPC SHM
checkpoint/restore for the ARM architecture I'm currently working on.


As far as I can see from the commit c0e9587841a0fd79bbf8296034faefb9afe72fb4
in the mainstream kernel:

the flag CACHEID_VIPT_ALIASING is never set for ARMv7
so it's impossible to guarantee that a IPC SHM segment
is always SHMLBA-aligned.


Is it true that the desired SHM alignment is impossible
to be achieved on the ARMv7 architecture?

Comments

Russell King - ARM Linux Jan. 31, 2013, 1:47 p.m. UTC | #1
On Thu, Jan 31, 2013 at 05:32:18PM +0400, Alexander Kartashov wrote:
> Dear colleagues,
>
> It tuned out that IPC SHM works in a bit strange way on ARMv7:
> the syscall sys_shmat() requires the argument shmaddr to be SHMLBA-aligned:
>
> [ipc/shm.c]
> [...]
>  979     else if ((addr = (ulong)shmaddr)) {
>  980         if (addr & (shmlba - 1)) {
>  981             if (shmflg & SHM_RND)
>  982                 addr &= ~(shmlba - 1);     /* round down */
>  983             else
>  984 #ifndef __ARCH_FORCE_SHMLBA
>  985                 if (addr & ~PAGE_MASK)
>  986 #endif
>  987                     goto out;
>  988         }
>  989         flags = MAP_SHARED | MAP_FIXED;
> [...]
>
> since macro __ARCH_FORCE_SHMLBA is unconditionally defined for the ARM
> architecture. However it uses the function arch_get_unmapped_area()
> introduced in the commit 4197692eef113eeb8e3e413cc70993a5e667e5b8
> in the mainstream kernel to allocate memory for a SHM segment.

Err, no.  Try again - this is the mainline kernel:

$ git log 4197692eef113eeb8e3e413cc70993a5e667e5b8
fatal: bad object 4197692eef113eeb8e3e413cc70993a5e667e5b8

Would this be in linux-next?  Maybe it's a patch which someone else has
applied which isn't in Linus' kernel?

With just the commit ID and no description (we're *constantly* harping
on about that on this list, especially in commit comments: ALWAYS provide
the description as well) it's much harder to do anything with your report.

Please try again.

Thanks.
Alexander Kartashov Jan. 31, 2013, 2:08 p.m. UTC | #2
On 01/31/2013 05:47 PM, Russell King - ARM Linux wrote:
> Err, no.  Try again - this is the mainline kernel:

I'm sorry, I forgot to note that it's very old code,
it's from the tglx/history.git that you may find here:
http://git.kernel.org/?p=linux/kernel/git/tglx/history.git;a=commit;h=4197692eef113eeb8e3e413cc70993a5e667e5b8

The piece of code I'm telling about hasn't changed
very much since then, it looks in the following way
in that commit:

  25 unsigned long
  26 arch_get_unmapped_area(struct file *filp, unsigned long addr,
  27         unsigned long len, unsigned long pgoff, unsigned long flags)
  28 {
  29     struct mm_struct *mm = current->mm;
  30     struct vm_area_struct *vma;
  31     unsigned long start_addr;
  32 #ifdef CONFIG_CPU_V6
  33     unsigned int cache_type;
  34     int do_align = 0, aliasing = 0;
  35
  36     /*
  37      * We only need to do colour alignment if either the I or D
  38      * caches alias.  This is indicated by bits 9 and 21 of the
  39      * cache type register.
  40      */
  41     cache_type = read_cpuid(CPUID_CACHETYPE);
  42     if (cache_type != read_cpuid(CPUID_ID)) {
  43         aliasing = (cache_type | cache_type >> 12) & (1 << 9);
  44         if (aliasing)
  45             do_align = filp || flags & MAP_SHARED;
  46     }


The last commit that touches the file is 
394ef6403abc36900d9303395a49a72d32666f2a
(http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commit;h=394ef6403abc36900d9303395a49a72d32666f2a)
and currently the lines look in the following way:

[arch/arm/mm/mmap.c]
[...]
54 unsigned long
55 arch_get_unmapped_area(struct file *filp, unsigned long addr,
56                 unsigned long len, unsigned long pgoff, unsigned long 
flags)
57 {
58         struct mm_struct *mm = current->mm;
59         struct vm_area_struct *vma;
60         int do_align = 0;
61         int aliasing = cache_is_vipt_aliasing();
62         struct vm_unmapped_area_info info;
63
64         /*
65          * We only need to do colour alignment if either the I or D
66          * caches alias.
67          */
68         if (aliasing)
69                 do_align = filp || (flags & MAP_SHARED);


Particularly, the lines

  68     if (aliasing)
  69         do_align = filp || (flags & MAP_SHARED);

hasn't changed since then.


I intentionally provided a reference to a historical branch
to point out that this code hasn't changed.
Alexander Kartashov Jan. 31, 2013, 3:35 p.m. UTC | #3
On 01/31/2013 05:47 PM, Russell King - ARM Linux wrote:
> Err, no.  Try again

OK, let me be more specific. The following code is a simplified
version of the test in the CRIU test suite that checks IPC SHM
checkpoint/restoration work correctly:

#include <stdio.h>
#include <sys/ipc.h>

int main()
{
     int loop;
     int shmid = shmget(0x94646337, 40960, IPC_CREAT);
     if (shmid < 0) {
         perror("Failed to get a SHM descriptor");
         return 1;
     }

     void *addr1, *addr2;

     addr1 = shmat(shmid, 0, 0);
     if (addr1 == (void*)-1) {
         perror("Failed to attach the SHM segment");
         return 2;
     }

     printf("Attached to %p\n", addr1);
     *((int*)addr1) = 1;

     shmdt(addr1);

     addr2 = shmat(shmid, addr1, 0);
     if (addr2 == (void*)-1) {
         perror("Failed to re-attach the SHM");
         printf("The SHM segment was formerly attached to %p\n", addr1);
         return 3;
     }

     if (addr2 != addr1) {
         printf("The SHM segment address changed: was %p, now %p\n", 
addr1, addr2);
         return 4;
     }

     return 0;
}


I'm running this code in the Linux 3.7.5:

root@crtest:~/ipc-fail# cat /proc/version
Linux version 3.7.5 (alex@alex-pc) (gcc version 4.6.3 20120201 
(prerelease) (crosstool-NG linaro-1.13.1-2012.02-20120222 - Linaro GCC 
2012.02) ) #1 SMP Thu Jan 31 19:01:37 MSK 2013

I'm running the kernel in the QEMU model of the board RM Versatile 
Express for Cortex-A9.
I use the config vexpress_defconfig to compile the kernel.


The test usually outputs something like:

root@crtest:~/ipc-fail# ./ipc-fail
Attached to 0x76e22000
Failed to re-attach the SHM: Invalid argument
The SHM segment was formely attached to 0x76e22000
root@crtest:~/ipc-fail# ./ipc-fail
Attached to 0x76df6000
Failed to re-attach the SHM: Invalid argument
The SHM segment was formely attached to 0x76df6000

However it sometimes succeeds:

root@crtest:~/ipc-fail# ./ipc-fail
Attached to 0x76e94000

As you can see the test fails when the address returned by
the function shmat() isn't SHMLBA-aligned.


I think that this is caused by the fact that the kernel function do_shmat()
requires the argument shmaddr to be SHMLBA-aligned:

[ipc/shm.c]
[...]
  958 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong 
*raddr,
  959           unsigned long shmlba)
  960 {
[...]
  974
  975     err = -EINVAL;
  976     if (shmid < 0)
  977         goto out;
  978     else if ((addr = (ulong)shmaddr)) {
  979         if (addr & (shmlba - 1)) {
  980             if (shmflg & SHM_RND)
  981                 addr &= ~(shmlba - 1);     /* round down */
  982             else
  983 #ifndef __ARCH_FORCE_SHMLBA
  984                 if (addr & ~PAGE_MASK)
  985 #endif
  986                     goto out;



However, it can't guarantee that its returned address is SHMLBA-aligned
because the function arch_get_unmapped_area():

[arch/arm/mm/mmap.c]
[...]
  66 unsigned long
  67 arch_get_unmapped_area(struct file *filp, unsigned long addr,
  68         unsigned long len, unsigned long pgoff, unsigned long flags)
  69 {
  70     struct mm_struct *mm = current->mm;
  71     struct vm_area_struct *vma;
  72     unsigned long start_addr;
  73     int do_align = 0;
  74     int aliasing = cache_is_vipt_aliasing();
  75
  76     /*
  77      * We only need to do colour alignment if either the I or D
  78      * caches alias.
  79      */
  80     if (aliasing)
  81         do_align = filp || (flags & MAP_SHARED);
[...]
114 full_search:
115     if (do_align)
116         addr = COLOUR_ALIGN(addr, pgoff);
117     else
118         addr = PAGE_ALIGN(addr);
119
120     for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
121         /* At this point:  (!vma || addr < vma->vm_end). */
122         if (TASK_SIZE - len < addr) {
123             /*
124              * Start a new search - just in case we missed
125              * some holes.
126              */
127             if (start_addr != TASK_UNMAPPED_BASE) {
128                 start_addr = addr = TASK_UNMAPPED_BASE;
129                 mm->cached_hole_size = 0;
130                 goto full_search;
131             }
132             return -ENOMEM;
133         }
134         if (!vma || addr + len <= vma->vm_start) {
135             /*
136              * Remember the place where we stopped the search:
137              */
138             mm->free_area_cache = addr + len;
139             return addr;
140         }
141         if (addr + mm->cached_hole_size < vma->vm_start)
142                 mm->cached_hole_size = vma->vm_start - addr;
143         addr = vma->vm_end;
144         if (do_align)
145             addr = COLOUR_ALIGN(addr, pgoff);
146     }
147 }

aligns the returned address on a SHMLBA-boundary only if I or D caches alias
as the comment reads:

  76     /*
  77      * We only need to do colour alignment if either the I or D
  78      * caches alias.
  79      */

that isn't true for ARMv7.


So the question is whether it's possible to align a SHM segement
on a SHMLBA boundary unconditionally.
diff mbox

Patch

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1939c90..5b121d8 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -67,6 +67,8 @@  unsigned int processor_id;
  EXPORT_SYMBOL(processor_id);
  unsigned int __machine_arch_type;
  EXPORT_SYMBOL(__machine_arch_type);
+unsigned int cacheid;
+EXPORT_SYMBOL(cacheid);

  unsigned int __atags_pointer __initdata;

@@ -229,6 +231,25 @@  int cpu_architecture(void)
         return cpu_arch;
  }

+static void __init cacheid_init(void)
+{
+       unsigned int cachetype = read_cpuid_cachetype();
+       unsigned int arch = cpu_architecture();
+
+       if (arch >= CPU_ARCH_ARMv7) {
+               cacheid = CACHEID_VIPT_NONALIASING;
+               if ((cachetype & (3 << 14)) == 1 << 14)
+                       cacheid |= CACHEID_ASID_TAGGED;
+       } else if (arch >= CPU_ARCH_ARMv6) {
+               if (cachetype & (1 << 23))
+                       cacheid = CACHEID_VIPT_ALIASING;
+               else
+                       cacheid = CACHEID_VIPT_NONALIASING;
+       } else {
+               cacheid = CACHEID_VIVT;
+       }
+}
+
  /*
   * These functions re-use the assembly code in head.S, which