Message ID | 1412933657-52641-2-git-send-email-aishchuk@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
> -----Original Message----- > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma- > owner@vger.kernel.org] On Behalf Of Alexey Ishchuk > Sent: Friday, October 10, 2014 12:34 PM > To: linux-rdma@vger.kernel.org > Cc: blaschka@linux.vnet.ibm.com; schwidefsky@de.ibm.com; > gmuelas@de.ibm.com; utz.bacher@de.ibm.com; roland@kernel.org; Yishai > Hadas; Alexey Ishchuk; Alexey Ishchuk > Subject: [PATCH 1/3] s390/kernel: add system calls for access PCI memory > > Add the new __NR_s390_pci_mmio_write and __NR_s390_pci_mmio_read > system calls to allow user space applications to access device PCI I/O > memory pages on s390x platform. > > Signed-off-by: Alexey Ishchuk <alexey_ishchuk@ru.ibm.com> > --- > arch/s390/include/uapi/asm/unistd.h | 4 +- > arch/s390/kernel/Makefile | 1 + > arch/s390/kernel/entry.h | 4 + > arch/s390/kernel/pci_mmio.c | 207 > ++++++++++++++++++++++++++++++++++++ > arch/s390/kernel/syscalls.S | 2 + > 5 files changed, 217 insertions(+), 1 deletion(-) > create mode 100644 arch/s390/kernel/pci_mmio.c > > diff --git a/arch/s390/include/uapi/asm/unistd.h > b/arch/s390/include/uapi/asm/unistd.h > index 3802d2d..ab49d1d 100644 > --- a/arch/s390/include/uapi/asm/unistd.h > +++ b/arch/s390/include/uapi/asm/unistd.h > @@ -283,7 +283,9 @@ > #define __NR_sched_setattr 345 > #define __NR_sched_getattr 346 > #define __NR_renameat2 347 > -#define NR_syscalls 348 > +#define __NR_s390_pci_mmio_write 348 > +#define __NR_s390_pci_mmio_read 349 > +#define NR_syscalls 350 > > /* > * There are some system calls that are not present on 64 bit, some > diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile > index 33225e8..3e71b7e 100644 > --- a/arch/s390/kernel/Makefile > +++ b/arch/s390/kernel/Makefile > @@ -60,6 +60,7 @@ ifdef CONFIG_64BIT > obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o > \ > perf_cpum_cf_events.o > obj-y += runtime_instr.o cache.o > +obj-y += pci_mmio.o > endif > > # vdso > diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h > index 6ac7819..a36b6f9 100644 > --- a/arch/s390/kernel/entry.h > +++ b/arch/s390/kernel/entry.h > @@ -70,4 +70,8 @@ struct old_sigaction; > long sys_s390_personality(unsigned int personality); > long sys_s390_runtime_instr(int command, int signum); > > +long sys_s390_pci_mmio_write(const unsigned long mmio_addr, > + const void *user_buffer, const size_t length); > +long sys_s390_pci_mmio_read(const unsigned long mmio_addr, > + void *user_buffer, const size_t length); > #endif /* _ENTRY_H */ > diff --git a/arch/s390/kernel/pci_mmio.c b/arch/s390/kernel/pci_mmio.c > new file mode 100644 > index 0000000..f318207 > --- /dev/null > +++ b/arch/s390/kernel/pci_mmio.c > @@ -0,0 +1,207 @@ > +/* > + * Access to PCI I/O memory from user space programs. > + * > + * Copyright IBM Corp. 2014 > + * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com> > + */ > +#include <linux/kernel.h> > +#include <linux/syscalls.h> > +#include <linux/init.h> > +#include <linux/mm.h> > +#include <linux/errno.h> > +#include <linux/pci.h> > + > +union value_buffer { > + u8 buf8; > + u16 buf16; > + u32 buf32; > + u64 buf64; > + u8 buf_large[64]; > +}; > + > +static long get_pfn(const unsigned long user_addr, > + const unsigned long access, > + unsigned long *pfn) > +{ > + struct vm_area_struct *vma = NULL; > + long ret = 0L; > + > + if (!pfn) > + return -EINVAL; > + > + down_read(¤t->mm->mmap_sem); > + vma = find_vma(current->mm, user_addr); > + if (vma) { > + if (!(vma->vm_flags & access)) > + ret = -EACCES; > + else > + ret = follow_pfn(vma, user_addr, pfn); > + } else { > + ret = -EINVAL; > + } > + up_read(¤t->mm->mmap_sem); > + > + return ret; > +} > + > +static inline int verify_page_addr(const unsigned long page_addr) > +{ > + return !(page_addr < ZPCI_IOMAP_ADDR_BASE || > + page_addr > (ZPCI_IOMAP_ADDR_BASE | ZPCI_IOMAP_ADDR_IDX_MASK)); > +} > + > +static long choose_buffer(const size_t length, > + union value_buffer *value, > + void **buf) > +{ > + long ret = 0L; > + > + if (length > sizeof(value->buf_large)) { > + *buf = kmalloc(length, GFP_KERNEL); > + if (!*buf) > + return -ENOMEM; > + ret = 1; > + } else { > + *buf = value->buf_large; > + } > + return ret; > +} > + > +SYSCALL_DEFINE3(s390_pci_mmio_write, > + const unsigned long, mmio_addr, > + const void __user *, user_buffer, > + const size_t, length) > +{ > + long ret = 0L; > + void *buf = NULL; > + long buf_allocated = 0; > + void __iomem *io_addr = NULL; > + unsigned long pfn = 0UL; > + unsigned long offset = 0UL; > + unsigned long page_addr = 0UL; > + union value_buffer value; > + > + if (!length) > + return -EINVAL; > + if (!zpci_is_enabled()) > + return -ENODEV; > + > + ret = get_pfn(mmio_addr, VM_WRITE, &pfn); > + if (ret) > + return ret; > + > + page_addr = pfn << PAGE_SHIFT; > + if (!verify_page_addr(page_addr)) > + return -EFAULT; > + > + offset = mmio_addr & ~PAGE_MASK; > + if (offset + length > PAGE_SIZE) > + return -EINVAL; > + io_addr = (void *)(page_addr | offset); > + > + buf_allocated = choose_buffer(length, &value, &buf); > + if (buf_allocated < 0L) > + return -ENOMEM; > + > + switch (length) { > + case 1: > + ret = get_user(value.buf8, ((u8 *)user_buffer)); This cast (and similar casts across the code) kills the __user annotation of the user buffer pointer. First - fix this to help various static verification tools such as sparse work on your code. Second - are you sure this switch-case block achieves any performance gain compared to always using copy_from_user? If so, why not just push it into the S390 copy from user code? > + break; > + case 2: > + ret = get_user(value.buf16, ((u16 *)user_buffer)); > + break; > + case 4: > + ret = get_user(value.buf32, ((u32 *)user_buffer)); > + break; > + case 8: > + ret = get_user(value.buf64, ((u64 *)user_buffer)); > + break; > + default: > + ret = copy_from_user(buf, user_buffer, length); > + } > + if (ret) > + goto out; > + > + switch (length) { > + case 1: > + __raw_writeb(value.buf8, io_addr); > + break; > + case 2: > + __raw_writew(value.buf16, io_addr); > + break; > + case 4: > + __raw_writel(value.buf32, io_addr); > + break; > + case 8: > + __raw_writeq(value.buf64, io_addr); > + break; > + default: > + memcpy_toio(io_addr, buf, length); > + } > +out: > + if (buf_allocated > 0L) > + kfree(buf); > + return ret; > +} > + > +SYSCALL_DEFINE3(s390_pci_mmio_read, > + const unsigned long, mmio_addr, > + void __user *, user_buffer, > + const size_t, length) > +{ > + long ret = 0L; > + void *buf = NULL; > + long buf_allocated = 0L; > + void __iomem *io_addr = NULL; > + unsigned long pfn = 0UL; > + unsigned long offset = 0UL; > + unsigned long page_addr = 0UL; > + union value_buffer value; > + > + if (!length) > + return -EINVAL; > + if (!zpci_is_enabled()) > + return -ENODEV; > + > + ret = get_pfn(mmio_addr, VM_READ, &pfn); > + if (ret) > + return ret; > + > + page_addr = pfn << PAGE_SHIFT; > + if (!verify_page_addr(page_addr)) > + return -EFAULT; > + > + offset = mmio_addr & ~PAGE_MASK; > + if (offset + length > PAGE_SIZE) > + return -EINVAL; > + io_addr = (void *)(page_addr | offset); > + > + buf_allocated = choose_buffer(length, &value, &buf); > + if (buf_allocated < 0L) > + return -ENOMEM; > + > + switch (length) { > + case 1: > + value.buf8 = __raw_readb(io_addr); > + ret = put_user(value.buf8, ((u8 *)user_buffer)); Add __user annotations in this code block as well. > + break; > + case 2: > + value.buf16 = __raw_readw(io_addr); > + ret = put_user(value.buf16, ((u16 *)user_buffer)); > + break; > + case 4: > + value.buf32 = __raw_readl(io_addr); > + ret = put_user(value.buf32, ((u32 *)user_buffer)); > + break; > + case 8: > + value.buf64 = __raw_readq(io_addr); > + ret = put_user(value.buf64, ((u64 *)user_buffer)); > + break; > + default: > + memcpy_fromio(buf, io_addr, length); > + ret = copy_to_user(user_buffer, buf, length); > + } > + if (buf_allocated > 0L) > + kfree(buf); > + return ret; > +} > diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S > index fe5cdf2..1faa942 100644 > --- a/arch/s390/kernel/syscalls.S > +++ b/arch/s390/kernel/syscalls.S > @@ -356,3 +356,5 @@ > SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) > SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) > /* 345 */ > SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) > SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) > +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,sys_ni_syscall) > +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,sys_ni_syscall) Generally speaking, looks OK once the __user annotation is added. I suspect you might need ack/review from the S390 maintainer as well for this to be pushed, as the syscall is generic to the entire S390 subsystem. --Shachar -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, 12 Oct 2014 11:52:55 +0000 Shachar Raindel <raindel@mellanox.com> wrote: > > + switch (length) { > > + case 1: > > + ret = get_user(value.buf8, ((u8 *)user_buffer)); > > This cast (and similar casts across the code) kills the __user > annotation of the user buffer pointer. > First - fix this to help various static verification tools such > as sparse work on your code. > Second - are you sure this switch-case block achieves any > performance gain compared to always using copy_from_user? > If so, why not just push it into the S390 copy from user code? The __user annotation is indeed missing. If the switch is improving performance needs to be seen, with the compile options set for z10 the get_user is inlined while the copy_from_user calls a function. For compiles < z10 all 5 switch cases will call the same __copy_from_user function. So it depends, as long as the switch is correct I am ok the code block for now. > > + switch (length) { > > + case 1: > > + value.buf8 = __raw_readb(io_addr); > > + ret = put_user(value.buf8, ((u8 *)user_buffer)); > > Add __user annotations in this code block as well. Yes, please add. > Generally speaking, looks OK once the __user annotation is added. > > I suspect you might need ack/review from the S390 maintainer as > well for this to be pushed, as the syscall is generic to the > entire S390 subsystem. With the missing __user annotations added: Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 3802d2d..ab49d1d 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -283,7 +283,9 @@ #define __NR_sched_setattr 345 #define __NR_sched_getattr 346 #define __NR_renameat2 347 -#define NR_syscalls 348 +#define __NR_s390_pci_mmio_write 348 +#define __NR_s390_pci_mmio_read 349 +#define NR_syscalls 350 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 33225e8..3e71b7e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -60,6 +60,7 @@ ifdef CONFIG_64BIT obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ perf_cpum_cf_events.o obj-y += runtime_instr.o cache.o +obj-y += pci_mmio.o endif # vdso diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 6ac7819..a36b6f9 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -70,4 +70,8 @@ struct old_sigaction; long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_pci_mmio_write(const unsigned long mmio_addr, + const void *user_buffer, const size_t length); +long sys_s390_pci_mmio_read(const unsigned long mmio_addr, + void *user_buffer, const size_t length); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/pci_mmio.c b/arch/s390/kernel/pci_mmio.c new file mode 100644 index 0000000..f318207 --- /dev/null +++ b/arch/s390/kernel/pci_mmio.c @@ -0,0 +1,207 @@ +/* + * Access to PCI I/O memory from user space programs. + * + * Copyright IBM Corp. 2014 + * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com> + */ +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/pci.h> + +union value_buffer { + u8 buf8; + u16 buf16; + u32 buf32; + u64 buf64; + u8 buf_large[64]; +}; + +static long get_pfn(const unsigned long user_addr, + const unsigned long access, + unsigned long *pfn) +{ + struct vm_area_struct *vma = NULL; + long ret = 0L; + + if (!pfn) + return -EINVAL; + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, user_addr); + if (vma) { + if (!(vma->vm_flags & access)) + ret = -EACCES; + else + ret = follow_pfn(vma, user_addr, pfn); + } else { + ret = -EINVAL; + } + up_read(¤t->mm->mmap_sem); + + return ret; +} + +static inline int verify_page_addr(const unsigned long page_addr) +{ + return !(page_addr < ZPCI_IOMAP_ADDR_BASE || + page_addr > (ZPCI_IOMAP_ADDR_BASE | ZPCI_IOMAP_ADDR_IDX_MASK)); +} + +static long choose_buffer(const size_t length, + union value_buffer *value, + void **buf) +{ + long ret = 0L; + + if (length > sizeof(value->buf_large)) { + *buf = kmalloc(length, GFP_KERNEL); + if (!*buf) + return -ENOMEM; + ret = 1; + } else { + *buf = value->buf_large; + } + return ret; +} + +SYSCALL_DEFINE3(s390_pci_mmio_write, + const unsigned long, mmio_addr, + const void __user *, user_buffer, + const size_t, length) +{ + long ret = 0L; + void *buf = NULL; + long buf_allocated = 0; + void __iomem *io_addr = NULL; + unsigned long pfn = 0UL; + unsigned long offset = 0UL; + unsigned long page_addr = 0UL; + union value_buffer value; + + if (!length) + return -EINVAL; + if (!zpci_is_enabled()) + return -ENODEV; + + ret = get_pfn(mmio_addr, VM_WRITE, &pfn); + if (ret) + return ret; + + page_addr = pfn << PAGE_SHIFT; + if (!verify_page_addr(page_addr)) + return -EFAULT; + + offset = mmio_addr & ~PAGE_MASK; + if (offset + length > PAGE_SIZE) + return -EINVAL; + io_addr = (void *)(page_addr | offset); + + buf_allocated = choose_buffer(length, &value, &buf); + if (buf_allocated < 0L) + return -ENOMEM; + + switch (length) { + case 1: + ret = get_user(value.buf8, ((u8 *)user_buffer)); + break; + case 2: + ret = get_user(value.buf16, ((u16 *)user_buffer)); + break; + case 4: + ret = get_user(value.buf32, ((u32 *)user_buffer)); + break; + case 8: + ret = get_user(value.buf64, ((u64 *)user_buffer)); + break; + default: + ret = copy_from_user(buf, user_buffer, length); + } + if (ret) + goto out; + + switch (length) { + case 1: + __raw_writeb(value.buf8, io_addr); + break; + case 2: + __raw_writew(value.buf16, io_addr); + break; + case 4: + __raw_writel(value.buf32, io_addr); + break; + case 8: + __raw_writeq(value.buf64, io_addr); + break; + default: + memcpy_toio(io_addr, buf, length); + } +out: + if (buf_allocated > 0L) + kfree(buf); + return ret; +} + +SYSCALL_DEFINE3(s390_pci_mmio_read, + const unsigned long, mmio_addr, + void __user *, user_buffer, + const size_t, length) +{ + long ret = 0L; + void *buf = NULL; + long buf_allocated = 0L; + void __iomem *io_addr = NULL; + unsigned long pfn = 0UL; + unsigned long offset = 0UL; + unsigned long page_addr = 0UL; + union value_buffer value; + + if (!length) + return -EINVAL; + if (!zpci_is_enabled()) + return -ENODEV; + + ret = get_pfn(mmio_addr, VM_READ, &pfn); + if (ret) + return ret; + + page_addr = pfn << PAGE_SHIFT; + if (!verify_page_addr(page_addr)) + return -EFAULT; + + offset = mmio_addr & ~PAGE_MASK; + if (offset + length > PAGE_SIZE) + return -EINVAL; + io_addr = (void *)(page_addr | offset); + + buf_allocated = choose_buffer(length, &value, &buf); + if (buf_allocated < 0L) + return -ENOMEM; + + switch (length) { + case 1: + value.buf8 = __raw_readb(io_addr); + ret = put_user(value.buf8, ((u8 *)user_buffer)); + break; + case 2: + value.buf16 = __raw_readw(io_addr); + ret = put_user(value.buf16, ((u16 *)user_buffer)); + break; + case 4: + value.buf32 = __raw_readl(io_addr); + ret = put_user(value.buf32, ((u32 *)user_buffer)); + break; + case 8: + value.buf64 = __raw_readq(io_addr); + ret = put_user(value.buf64, ((u64 *)user_buffer)); + break; + default: + memcpy_fromio(buf, io_addr, length); + ret = copy_to_user(user_buffer, buf, length); + } + if (buf_allocated > 0L) + kfree(buf); + return ret; +} diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index fe5cdf2..1faa942 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -356,3 +356,5 @@ SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,sys_ni_syscall) +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,sys_ni_syscall)
Add the new __NR_s390_pci_mmio_write and __NR_s390_pci_mmio_read system calls to allow user space applications to access device PCI I/O memory pages on s390x platform. Signed-off-by: Alexey Ishchuk <alexey_ishchuk@ru.ibm.com> --- arch/s390/include/uapi/asm/unistd.h | 4 +- arch/s390/kernel/Makefile | 1 + arch/s390/kernel/entry.h | 4 + arch/s390/kernel/pci_mmio.c | 207 ++++++++++++++++++++++++++++++++++++ arch/s390/kernel/syscalls.S | 2 + 5 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kernel/pci_mmio.c