diff mbox

[v2,17/31] arm64: System calls handling

Message ID 1344966752-16102-18-git-send-email-catalin.marinas@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Catalin Marinas Aug. 14, 2012, 5:52 p.m. UTC
This patch adds support for system calls coming from 64-bit
applications. It uses the asm-generic/unistd.h definitions with the
canonical set of system calls. The private system calls are only used
for 32-bit (compat) applications as 64-bit ones can set the TLS and
flush the caches entirely from user space.

The sys_call_table is just an array defined in a C file and it contains
pointers to the syscall functions. The array is 4KB aligned to allow the
use of the ADRP instruction (longer range ADR) in entry.S.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/stat.h     |   63 +++++++++++++++++
 arch/arm64/include/asm/statfs.h   |   23 ++++++
 arch/arm64/include/asm/syscalls.h |   40 +++++++++++
 arch/arm64/include/asm/unistd.h   |   27 +++++++
 arch/arm64/kernel/sys.c           |  138 +++++++++++++++++++++++++++++++++++++
 5 files changed, 291 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm64/include/asm/stat.h
 create mode 100644 arch/arm64/include/asm/statfs.h
 create mode 100644 arch/arm64/include/asm/syscalls.h
 create mode 100644 arch/arm64/include/asm/unistd.h
 create mode 100644 arch/arm64/kernel/sys.c

Comments

Arnd Bergmann Aug. 15, 2012, 2:22 p.m. UTC | #1
On Tuesday 14 August 2012, Catalin Marinas wrote:

> +
> +/* This matches struct stat64 in glibc2.1, hence the absolutely
> + * insane amounts of padding around dev_t's.
> + * Note: The kernel zero's the padded region because glibc might read them
> + * in the hope that the kernel has stretched to using larger sizes.
> + */
> +struct stat64 {
> +	compat_u64	st_dev;
> +	unsigned char   __pad0[4];
> +
> +#define STAT64_HAS_BROKEN_ST_INO	1
> +	compat_ulong_t	__st_ino;
> +	compat_uint_t	st_mode;
> +	compat_uint_t	st_nlink;
> +
> +	compat_ulong_t	st_uid;
> +	compat_ulong_t	st_gid;
> +
> +	compat_u64	st_rdev;
> +	unsigned char   __pad3[4];
> +
> +	compat_s64	st_size;
> +	compat_ulong_t	st_blksize;
> +	compat_u64	st_blocks;	/* Number 512-byte blocks allocated. */
> +
> +	compat_ulong_t	st_atime;
> +	compat_ulong_t	st_atime_nsec;
> +
> +	compat_ulong_t	st_mtime;
> +	compat_ulong_t	st_mtime_nsec;
> +
> +	compat_ulong_t	st_ctime;
> +	compat_ulong_t	st_ctime_nsec;
> +
> +	compat_u64	st_ino;
> +};

The comment above struct stat64 is completely irrelevant here. I would instead
explain why you need your own stat64 in the first place.

> +int kernel_execve(const char *filename,
> +		  const char *const argv[],
> +		  const char *const envp[])
> +{
> +	struct pt_regs regs;
> +	int ret;
> +
> +	memset(&regs, 0, sizeof(struct pt_regs));
> +	ret = do_execve(filename,
> +			(const char __user *const __user *)argv,
> +			(const char __user *const __user *)envp, &regs);
> +	if (ret < 0)
> +		goto out;
> +
> +	/*
> +	 * Save argc to the register structure for userspace.
> +	 */
> +	regs.regs[0] = ret;
> +
> +	/*
> +	 * We were successful.  We won't be returning to our caller, but
> +	 * instead to user space by manipulating the kernel stack.
> +	 */
> +	asm(	"add	x0, %0, %1\n\t"
> +		"mov	x1, %2\n\t"
> +		"mov	x2, %3\n\t"
> +		"bl	memmove\n\t"	/* copy regs to top of stack */
> +		"mov	x27, #0\n\t"	/* not a syscall */
> +		"mov	x28, %0\n\t"	/* thread structure */
> +		"mov	sp, x0\n\t"	/* reposition stack pointer */
> +		"b	ret_to_user"
> +		:
> +		: "r" (current_thread_info()),
> +		  "Ir" (THREAD_START_SP - sizeof(regs)),
> +		  "r" (&regs),
> +		  "Ir" (sizeof(regs))
> +		: "x0", "x1", "x2", "x27", "x28", "x30", "memory");
> +
> + out:
> +	return ret;
> +}
> +EXPORT_SYMBOL(kernel_execve);

Al Viro was recently talking about a generic implementation of execve.
I can't find that now, but I think you should use that.

> +
> +asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
> +			 unsigned long prot, unsigned long flags,
> +			 unsigned long fd, off_t off)
> +{
> +	if (offset_in_page(off) != 0)
> +		return -EINVAL;
> +
> +	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
> +}
> +
> +/*
> + * Wrappers to pass the pt_regs argument.
> + */
> +#define sys_execve		sys_execve_wrapper
> +#define sys_clone		sys_clone_wrapper
> +#define sys_rt_sigreturn	sys_rt_sigreturn_wrapper
> +#define sys_sigaltstack		sys_sigaltstack_wrapper

I think

#define sys_mmap sys_mmap_pgoff 

would be more appropriate than defining your own sys_mmap function here.
We should probably make that the default in asm-generic/unistd.h and
change the architectures that have their own implementation to override
it.

	Arnd
Catalin Marinas Aug. 21, 2012, 5:51 p.m. UTC | #2
On Wed, Aug 15, 2012 at 03:22:16PM +0100, Arnd Bergmann wrote:
> On Tuesday 14 August 2012, Catalin Marinas wrote:
> 
> > +
> > +/* This matches struct stat64 in glibc2.1, hence the absolutely
> > + * insane amounts of padding around dev_t's.
> > + * Note: The kernel zero's the padded region because glibc might read them
> > + * in the hope that the kernel has stretched to using larger sizes.
> > + */
> > +struct stat64 {
> > +	compat_u64	st_dev;
> > +	unsigned char   __pad0[4];
> 
> The comment above struct stat64 is completely irrelevant here. I would instead
> explain why you need your own stat64 in the first place.

OK, I added a comment. It's only needed for compat.

> > +int kernel_execve(const char *filename,
> > +		  const char *const argv[],
> > +		  const char *const envp[])
> 
> Al Viro was recently talking about a generic implementation of execve.
> I can't find that now, but I think you should use that.

I've seen these but I'm waiting for the generic sys_execve and
kernel_execve to get into mainline before switch arch/arm64 to them.

> > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
> > +			 unsigned long prot, unsigned long flags,
> > +			 unsigned long fd, off_t off)
> > +{
> > +	if (offset_in_page(off) != 0)
> > +		return -EINVAL;
> > +
> > +	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
> > +}
> 
> I think
> 
> #define sys_mmap sys_mmap_pgoff 

There are slightly different semantics with the last argument of
sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function
takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2).

Looking at the other architectures, it makes sense to use a generic
sys_mmap() implementation similar to the one above (or the ia-64, seems
to be the most complete).
Arnd Bergmann Aug. 21, 2012, 8:14 p.m. UTC | #3
On Tuesday 21 August 2012, Catalin Marinas wrote:
> > > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
> > > +                    unsigned long prot, unsigned long flags,
> > > +                    unsigned long fd, off_t off)
> > > +{
> > > +   if (offset_in_page(off) != 0)
> > > +           return -EINVAL;
> > > +
> > > +   return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
> > > +}
> > 
> > I think
> > 
> > #define sys_mmap sys_mmap_pgoff 
> 
> There are slightly different semantics with the last argument of
> sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function
> takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2).
> 
> Looking at the other architectures, it makes sense to use a generic
> sys_mmap() implementation similar to the one above (or the ia-64, seems
> to be the most complete).
> 

Why that? The generic sys_mmap_pgoff was specifically added so new architectures
could just use that instead of having their own wrappers, see f8b72560.

	Arnd
Catalin Marinas Aug. 21, 2012, 10:01 p.m. UTC | #4
On Tue, Aug 21, 2012 at 09:14:01PM +0100, Arnd Bergmann wrote:
> On Tuesday 21 August 2012, Catalin Marinas wrote:
> > > > +asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
> > > > +                    unsigned long prot, unsigned long flags,
> > > > +                    unsigned long fd, off_t off)
> > > > +{
> > > > +   if (offset_in_page(off) != 0)
> > > > +           return -EINVAL;
> > > > +
> > > > +   return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
> > > > +}
> > > 
> > > I think
> > > 
> > > #define sys_mmap sys_mmap_pgoff 
> > 
> > There are slightly different semantics with the last argument of
> > sys_mmap() which takes a byte offset. The sys_mmap_pgoff() function
> > takes the offset shifted by PAGE_SHIFT (which is the same as sys_mmap2).
> > 
> > Looking at the other architectures, it makes sense to use a generic
> > sys_mmap() implementation similar to the one above (or the ia-64, seems
> > to be the most complete).
> 
> Why that? The generic sys_mmap_pgoff was specifically added so new architectures
> could just use that instead of having their own wrappers, see f8b72560.

As I understand, sys_mmap_pgoff can be used instead of sys_mmap2 on new
32-bit architectures. But on 64-bit architectures we don't have
sys_mmap2, only sys_mmap with the difference that the last argument is
the offset in bytes (and multiple of PAGE_SIZE) rather than in pages. So
unless we change the meaning of this last argument for sys_mmap, we
cannot just define it to sys_mmap_pgoff.

Since the other 64-bit architectures seem to have a sys_mmap wrapper
that does this:

	sys_mmap_pgoff(..., off >> PAGE_SHIFT);

I think AArch64 should also use the same sys_mmap convention. We can
make this wrapper generic.
Arnd Bergmann Aug. 22, 2012, 7:56 a.m. UTC | #5
On Tuesday 21 August 2012, Catalin Marinas wrote:
> As I understand, sys_mmap_pgoff can be used instead of sys_mmap2 on new
> 32-bit architectures. But on 64-bit architectures we don't have
> sys_mmap2, only sys_mmap with the difference that the last argument is
> the offset in bytes (and multiple of PAGE_SIZE) rather than in pages. So
> unless we change the meaning of this last argument for sys_mmap, we
> cannot just define it to sys_mmap_pgoff.
> 
> Since the other 64-bit architectures seem to have a sys_mmap wrapper
> that does this:
> 
>         sys_mmap_pgoff(..., off >> PAGE_SHIFT);
> 
> I think AArch64 should also use the same sys_mmap convention. We can
> make this wrapper generic.

But the wrapper can just as well be part of glibc, which already has
one. There is no reason for the kernel to export two generic interfaces
for mmap when one of them only works on 64 bit and the other one is
good for both 32 and 64 bit.

All the other 64 bit architectures (besides tile) were added to the
kernel before we had sys_mmap_pgoff.

	Arnd
Catalin Marinas Aug. 22, 2012, 10:29 a.m. UTC | #6
On Wed, Aug 22, 2012 at 08:56:30AM +0100, Arnd Bergmann wrote:
> On Tuesday 21 August 2012, Catalin Marinas wrote:
> > As I understand, sys_mmap_pgoff can be used instead of sys_mmap2 on new
> > 32-bit architectures. But on 64-bit architectures we don't have
> > sys_mmap2, only sys_mmap with the difference that the last argument is
> > the offset in bytes (and multiple of PAGE_SIZE) rather than in pages. So
> > unless we change the meaning of this last argument for sys_mmap, we
> > cannot just define it to sys_mmap_pgoff.
> > 
> > Since the other 64-bit architectures seem to have a sys_mmap wrapper
> > that does this:
> > 
> >         sys_mmap_pgoff(..., off >> PAGE_SHIFT);
> > 
> > I think AArch64 should also use the same sys_mmap convention. We can
> > make this wrapper generic.
> 
> But the wrapper can just as well be part of glibc, which already has
> one. There is no reason for the kernel to export two generic interfaces
> for mmap when one of them only works on 64 bit and the other one is
> good for both 32 and 64 bit.

The kernel only exports a single interface for 64-bit, that's
sys_mmap(). For compat we only export sys_mmap2() (which, of course,
would not work for 64-bit).

The generic prototypes for sys_mmap and sys_mmap2 are different with
regards to the last argument: off_t vs unsigned long. While in practice
it's the same size, off_t is used throughout the kernel as offset in
bytes rather than pages (hence the prototype change in sys_mmap2 and
sys_mmap_pgoff).

But what's more important - moving this wrapper to glibc causes issues
with the page size. We support both 4KB and 64KB pages on 64-bit systems
(the latter without compat support). The kernel is in a better position
to do the shift by a compile-time constant. Glibc would need to enquire
the actual page size to do the shift before calling sys_mmap_pgoff. If
we assume in glibc that the shift is always 12, we need another wrapper
in the kernel anyway for 64KB page configuration. So passing the offset
in bytes worked best for us.

> All the other 64 bit architectures (besides tile) were added to the
> kernel before we had sys_mmap_pgoff.

So there is no new 64-bit architecture that defines sys_mmap to
sys_mmap_pgoff. I don't think that AArch64 should introduce this, given
the restrictions I mentioned above. sys_mmap2/sys_mmap_pgoff are a way
to extend the file offset beyond 32-bit but that's not needed on a
64-bit system.
Arnd Bergmann Aug. 22, 2012, 12:27 p.m. UTC | #7
On Wednesday 22 August 2012, Catalin Marinas wrote:
> But what's more important - moving this wrapper to glibc causes issues
> with the page size. We support both 4KB and 64KB pages on 64-bit systems
> (the latter without compat support). The kernel is in a better position
> to do the shift by a compile-time constant. Glibc would need to enquire
> the actual page size to do the shift before calling sys_mmap_pgoff. If
> we assume in glibc that the shift is always 12, we need another wrapper
> in the kernel anyway for 64KB page configuration. So passing the offset
> in bytes worked best for us.

Right, the kernel interface should really be independent of the page
size, as sys_mmap2 normally is, and sys_mmap2 is not provided here.

	Arnd
Catalin Marinas Aug. 22, 2012, 5:13 p.m. UTC | #8
On Wed, Aug 22, 2012 at 01:27:14PM +0100, Arnd Bergmann wrote:
> On Wednesday 22 August 2012, Catalin Marinas wrote:
> > But what's more important - moving this wrapper to glibc causes issues
> > with the page size. We support both 4KB and 64KB pages on 64-bit systems
> > (the latter without compat support). The kernel is in a better position
> > to do the shift by a compile-time constant. Glibc would need to enquire
> > the actual page size to do the shift before calling sys_mmap_pgoff. If
> > we assume in glibc that the shift is always 12, we need another wrapper
> > in the kernel anyway for 64KB page configuration. So passing the offset
> > in bytes worked best for us.
> 
> Right, the kernel interface should really be independent of the page
> size, as sys_mmap2 normally is, and sys_mmap2 is not provided here.

sys_mmap2 is indeed independent of the page size on most architectures
assuming that the last argument represents the offset in units of 4096.
The cris and ia64 seem to differ (one being 8K, the other variable).

sys_mmap is also independent of the page size.

But using sys_mmap2 for a 64-bit architecture, especially when the page
size is not always 4K, does not bring any advantages. We end up doing a
shift by 12 in glibc and another shift by (PAGE_SHIFT - 12) in the
kernel wrapper. Unless I missed your point, I don't see the reason for
using sys_mmap2 on a 64-bit architecture, apart from it being newer (and
compat support should not have any relevance, we have different syscall
tables anyway).
Catalin Marinas Sept. 3, 2012, 11:48 a.m. UTC | #9
Hi Arnd,

On Wed, Aug 22, 2012 at 06:13:10PM +0100, Catalin Marinas wrote:
> On Wed, Aug 22, 2012 at 01:27:14PM +0100, Arnd Bergmann wrote:
> > On Wednesday 22 August 2012, Catalin Marinas wrote:
> > > But what's more important - moving this wrapper to glibc causes issues
> > > with the page size. We support both 4KB and 64KB pages on 64-bit systems
> > > (the latter without compat support). The kernel is in a better position
> > > to do the shift by a compile-time constant. Glibc would need to enquire
> > > the actual page size to do the shift before calling sys_mmap_pgoff. If
> > > we assume in glibc that the shift is always 12, we need another wrapper
> > > in the kernel anyway for 64KB page configuration. So passing the offset
> > > in bytes worked best for us.
> > 
> > Right, the kernel interface should really be independent of the page
> > size, as sys_mmap2 normally is, and sys_mmap2 is not provided here.
> 
> sys_mmap2 is indeed independent of the page size on most architectures
> assuming that the last argument represents the offset in units of 4096.
> The cris and ia64 seem to differ (one being 8K, the other variable).
> 
> sys_mmap is also independent of the page size.
> 
> But using sys_mmap2 for a 64-bit architecture, especially when the page
> size is not always 4K, does not bring any advantages. We end up doing a
> shift by 12 in glibc and another shift by (PAGE_SHIFT - 12) in the
> kernel wrapper. Unless I missed your point, I don't see the reason for
> using sys_mmap2 on a 64-bit architecture, apart from it being newer (and
> compat support should not have any relevance, we have different syscall
> tables anyway).

I forgot about this at the KS and we haven't got to a clear conclusion.

Do we (1) stick with the sys_mmap() for 64-bit systems and avoid offset
conversion in both glibc and kernel or (2) use sys_mmap2() with a 12
shift in glibc and (PAGE_SHIFT - 12) in the kernel wrapper?

I personally prefer (1) as it doesn't require a kernel wrapper and we
avoid the double shifting. A reason for (2) would be if we ever need
file offsets greater than 16EB.
Arnd Bergmann Sept. 3, 2012, 12:39 p.m. UTC | #10
On Monday 03 September 2012, Catalin Marinas wrote:
> I forgot about this at the KS and we haven't got to a clear conclusion.
> 
> Do we (1) stick with the sys_mmap() for 64-bit systems and avoid offset
> conversion in both glibc and kernel or (2) use sys_mmap2() with a 12
> shift in glibc and (PAGE_SHIFT - 12) in the kernel wrapper?
> 
> I personally prefer (1) as it doesn't require a kernel wrapper and we
> avoid the double shifting. 

Yes, I think it's ok this way.

> A reason for (2) would be if we ever need file offsets greater than 16EB.

Let's not worry about this for now, all the other architectures will
have the same problem when we get there.

	Arnd
diff mbox

Patch

diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
new file mode 100644
index 0000000..f63a680
--- /dev/null
+++ b/arch/arm64/include/asm/stat.h
@@ -0,0 +1,63 @@ 
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_STAT_H
+#define __ASM_STAT_H
+
+#include <asm-generic/stat.h>
+
+#if defined(__KERNEL__) && defined(CONFIG_AARCH32_EMULATION)
+
+#include <asm/compat.h>
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ * Note: The kernel zero's the padded region because glibc might read them
+ * in the hope that the kernel has stretched to using larger sizes.
+ */
+struct stat64 {
+	compat_u64	st_dev;
+	unsigned char   __pad0[4];
+
+#define STAT64_HAS_BROKEN_ST_INO	1
+	compat_ulong_t	__st_ino;
+	compat_uint_t	st_mode;
+	compat_uint_t	st_nlink;
+
+	compat_ulong_t	st_uid;
+	compat_ulong_t	st_gid;
+
+	compat_u64	st_rdev;
+	unsigned char   __pad3[4];
+
+	compat_s64	st_size;
+	compat_ulong_t	st_blksize;
+	compat_u64	st_blocks;	/* Number 512-byte blocks allocated. */
+
+	compat_ulong_t	st_atime;
+	compat_ulong_t	st_atime_nsec;
+
+	compat_ulong_t	st_mtime;
+	compat_ulong_t	st_mtime_nsec;
+
+	compat_ulong_t	st_ctime;
+	compat_ulong_t	st_ctime_nsec;
+
+	compat_u64	st_ino;
+};
+
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/statfs.h b/arch/arm64/include/asm/statfs.h
new file mode 100644
index 0000000..6f62190
--- /dev/null
+++ b/arch/arm64/include/asm/statfs.h
@@ -0,0 +1,23 @@ 
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_STATFS_H
+#define __ASM_STATFS_H
+
+#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4)))
+
+#include <asm-generic/statfs.h>
+
+#endif
diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h
new file mode 100644
index 0000000..09ff335
--- /dev/null
+++ b/arch/arm64/include/asm/syscalls.h
@@ -0,0 +1,40 @@ 
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SYSCALLS_H
+#define __ASM_SYSCALLS_H
+
+#include <linux/linkage.h>
+#include <linux/compiler.h>
+#include <linux/signal.h>
+
+/*
+ * System call wrappers implemented in kernel/entry.S.
+ */
+asmlinkage long sys_execve_wrapper(const char __user *filename,
+				   const char __user *const __user *argv,
+				   const char __user *const __user *envp);
+asmlinkage long sys_clone_wrapper(unsigned long clone_flags,
+				  unsigned long newsp,
+				  void __user *parent_tid,
+				  unsigned long tls_val,
+				  void __user *child_tid);
+asmlinkage long sys_rt_sigreturn_wrapper(void);
+asmlinkage long sys_sigaltstack_wrapper(const stack_t __user *uss,
+					stack_t __user *uoss);
+
+#include <asm-generic/syscalls.h>
+
+#endif	/* __ASM_SYSCALLS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
new file mode 100644
index 0000000..b00718c
--- /dev/null
+++ b/arch/arm64/include/asm/unistd.h
@@ -0,0 +1,27 @@ 
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#if !defined(__ASM_UNISTD_H) || defined(__SYSCALL)
+#define __ASM_UNISTD_H
+
+#ifndef __SYSCALL_COMPAT
+#include <asm-generic/unistd.h>
+#endif
+
+#if defined(__KERNEL__) && defined(CONFIG_AARCH32_EMULATION)
+#include <asm/unistd32.h>
+#endif
+
+#endif /* __ASM_UNISTD_H */
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
new file mode 100644
index 0000000..905fcfb
--- /dev/null
+++ b/arch/arm64/kernel/sys.c
@@ -0,0 +1,138 @@ 
+/*
+ * AArch64-specific system calls implementation
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+
+/*
+ * Clone a task - this clones the calling program thread.
+ */
+asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
+			  int __user *parent_tidptr, unsigned long tls_val,
+			  int __user *child_tidptr, struct pt_regs *regs)
+{
+	if (!newsp)
+		newsp = regs->sp;
+	/* 16-byte aligned stack mandatory on AArch64 */
+	if (newsp & 15)
+		return -EINVAL;
+	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage long sys_execve(const char __user *filenamei,
+			   const char __user *const __user *argv,
+			   const char __user *const __user *envp,
+			   struct pt_regs *regs)
+{
+	long error;
+	char * filename;
+
+	filename = getname(filenamei);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	error = do_execve(filename, argv, envp, regs);
+	putname(filename);
+out:
+	return error;
+}
+
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
+{
+	struct pt_regs regs;
+	int ret;
+
+	memset(&regs, 0, sizeof(struct pt_regs));
+	ret = do_execve(filename,
+			(const char __user *const __user *)argv,
+			(const char __user *const __user *)envp, &regs);
+	if (ret < 0)
+		goto out;
+
+	/*
+	 * Save argc to the register structure for userspace.
+	 */
+	regs.regs[0] = ret;
+
+	/*
+	 * We were successful.  We won't be returning to our caller, but
+	 * instead to user space by manipulating the kernel stack.
+	 */
+	asm(	"add	x0, %0, %1\n\t"
+		"mov	x1, %2\n\t"
+		"mov	x2, %3\n\t"
+		"bl	memmove\n\t"	/* copy regs to top of stack */
+		"mov	x27, #0\n\t"	/* not a syscall */
+		"mov	x28, %0\n\t"	/* thread structure */
+		"mov	sp, x0\n\t"	/* reposition stack pointer */
+		"b	ret_to_user"
+		:
+		: "r" (current_thread_info()),
+		  "Ir" (THREAD_START_SP - sizeof(regs)),
+		  "r" (&regs),
+		  "Ir" (sizeof(regs))
+		: "x0", "x1", "x2", "x27", "x28", "x30", "memory");
+
+ out:
+	return ret;
+}
+EXPORT_SYMBOL(kernel_execve);
+
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+			 unsigned long prot, unsigned long flags,
+			 unsigned long fd, off_t off)
+{
+	if (offset_in_page(off) != 0)
+		return -EINVAL;
+
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
+
+/*
+ * Wrappers to pass the pt_regs argument.
+ */
+#define sys_execve		sys_execve_wrapper
+#define sys_clone		sys_clone_wrapper
+#define sys_rt_sigreturn	sys_rt_sigreturn_wrapper
+#define sys_sigaltstack		sys_sigaltstack_wrapper
+
+#include <asm/syscalls.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym)	[nr] = sym,
+
+/*
+ * The sys_call_table array must be 4K aligned to be accessible from
+ * kernel/entry.S.
+ */
+void *sys_call_table[__NR_syscalls] __aligned(4096) = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};