sh: Implement __get_user_u64() required for 64-bit get_user()
diff mbox series

Message ID 20200529174540.4189874-2-glaubitz@physik.fu-berlin.de
State New
Headers show
Series
  • sh: Implement __get_user_u64() required for 64-bit get_user()
Related show

Commit Message

John Paul Adrian Glaubitz May 29, 2020, 5:45 p.m. UTC
Trying to build the kernel with CONFIG_INFINIBAND_USER_ACCESS enabled fails

     ERROR: "__get_user_unknown" [drivers/infiniband/core/ib_uverbs.ko] undefined!

with on SH since the kernel misses a 64-bit implementation of get_user().

Implement the missing 64-bit get_user() as __get_user_u64(), matching the
already existing __put_user_u64() which implements the 64-bit put_user().

Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
---
 arch/sh/include/asm/uaccess_32.h | 49 ++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

Comments

Geert Uytterhoeven May 31, 2020, 9:52 a.m. UTC | #1
Hi Adrian,

On Fri, May 29, 2020 at 7:46 PM John Paul Adrian Glaubitz
<glaubitz@physik.fu-berlin.de> wrote:
> Trying to build the kernel with CONFIG_INFINIBAND_USER_ACCESS enabled fails
>
>      ERROR: "__get_user_unknown" [drivers/infiniband/core/ib_uverbs.ko] undefined!
>
> with on SH since the kernel misses a 64-bit implementation of get_user().
>
> Implement the missing 64-bit get_user() as __get_user_u64(), matching the
> already existing __put_user_u64() which implements the 64-bit put_user().
>
> Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>

Thanks for your patch!

> --- a/arch/sh/include/asm/uaccess_32.h
> +++ b/arch/sh/include/asm/uaccess_32.h
> @@ -26,6 +26,9 @@ do {                                                          \
>         case 4:                                                 \
>                 __get_user_asm(x, ptr, retval, "l");            \
>                 break;                                          \
> +       case 8:                                                 \
> +               __get_user_u64(x, ptr, retval);                 \
> +               break;                                          \
>         default:                                                \
>                 __get_user_unknown();                           \
>                 break;                                          \
> @@ -66,6 +69,52 @@ do {                                                 \
>
>  extern void __get_user_unknown(void);
>
> +#if defined(CONFIG_CPU_LITTLE_ENDIAN)
> +#define __get_user_u64(x, addr, err) \
> +({ \
> +__asm__ __volatile__( \
> +       "1:\n\t" \
> +       "mov.l  %2,%R1\n\t" \
> +       "mov.l  %T2,%S1\n\t" \
> +       "2:\n" \
> +       ".section       .fixup,\"ax\"\n" \
> +       "3:\n\t" \
> +       "mov    #0, %1\n\t" \

As this is the 64-bit variant, I think this single move should be
replaced by a double move:

       "mov  #0,%R1\n\t" \
       "mov  #0,%S1\n\t" \

Same for the big endian version below.

Disclaimer: uncompiled, untested, no SH assembler expert.

> +       "mov.l  4f, %0\n\t" \
> +       "jmp    @%0\n\t" \
> +       " mov   %3, %0\n\t" \
> +       ".balign        4\n" \
> +       "4:     .long   2b\n\t" \
> +       ".previous\n" \
> +       ".section       __ex_table,\"a\"\n\t" \
> +       ".long  1b, 3b\n\t" \
> +       ".previous" \
> +       :"=&r" (err), "=&r" (x) \
> +       :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })

Gr{oetje,eeting}s,

                        Geert
John Paul Adrian Glaubitz May 31, 2020, 9:54 a.m. UTC | #2
Hi Geert!

On 5/31/20 11:52 AM, Geert Uytterhoeven wrote:
> As this is the 64-bit variant, I think this single move should be
> replaced by a double move:
> 
>        "mov  #0,%R1\n\t" \
>        "mov  #0,%S1\n\t" \
> 
> Same for the big endian version below.
> 
> Disclaimer: uncompiled, untested, no SH assembler expert.

Right, this makes sense. I'll send a new patch shortly.

As for the assembler review, I'll ask Yutaka Niibe who is a friend of mine
and one of the original SuperH wizards ;).

Adrian
John Paul Adrian Glaubitz May 31, 2020, 9:59 a.m. UTC | #3
On 5/31/20 11:54 AM, John Paul Adrian Glaubitz wrote:
> Hi Geert!
> 
> On 5/31/20 11:52 AM, Geert Uytterhoeven wrote:
>> As this is the 64-bit variant, I think this single move should be
>> replaced by a double move:
>>
>>        "mov  #0,%R1\n\t" \
>>        "mov  #0,%S1\n\t" \
>>
>> Same for the big endian version below.
>>
>> Disclaimer: uncompiled, untested, no SH assembler expert.
> 
> Right, this makes sense. I'll send a new patch shortly.

Hmm, this change is not the case for __put_user_asm() vs. __put_user_u64().

But I have to admit, I don't know what the part below "3:\n\t" is for.

Adrian
Geert Uytterhoeven May 31, 2020, 10:43 a.m. UTC | #4
Hi Adrian,

On Sun, May 31, 2020 at 11:59 AM John Paul Adrian Glaubitz
<glaubitz@physik.fu-berlin.de> wrote:
> On 5/31/20 11:54 AM, John Paul Adrian Glaubitz wrote:
> > On 5/31/20 11:52 AM, Geert Uytterhoeven wrote:
> >> As this is the 64-bit variant, I think this single move should be
> >> replaced by a double move:
> >>
> >>        "mov  #0,%R1\n\t" \
> >>        "mov  #0,%S1\n\t" \
> >>
> >> Same for the big endian version below.
> >>
> >> Disclaimer: uncompiled, untested, no SH assembler expert.
> >
> > Right, this makes sense. I'll send a new patch shortly.
>
> Hmm, this change is not the case for __put_user_asm() vs. __put_user_u64().
> But I have to admit, I don't know what the part below "3:\n\t" is for.

It's part of the exception handling, in case the passed (userspace) pointer
points to an inaccessible address, and triggers an exception.

For an invalid store, nothing is done, besides returning -EFAULT.
Hence there's no "mov #0, %1\n\t" in the put_user case.
For an invalid load, the data is replaced by zero, and -EFAULT is returned.

> +__asm__ __volatile__( \
> +       "1:\n\t" \
> +       "mov.l  %2,%R1\n\t" \
> +       "mov.l  %T2,%S1\n\t" \
> +       "2:\n" \

(reordering the two sections for easier explanation)

> +       ".section       __ex_table,\"a\"\n\t" \
> +       ".long  1b, 3b\n\t" \

In case an exception happens for the instruction at 1b, jump to 3b.

Note that the m68k version has two entries here: one for each half of
the 64-bit access[*].
I don't know if that is really needed (and thus SH needs it, too), or if
the exception code handles subsequent instructions automatically.

> +       ".section       .fixup,\"ax\"\n" \
> +       "3:\n\t" \
> +       "mov    #0, %1\n\t" \

Return zero instead of the data at the (invalid) address.

> +       "mov.l  4f, %0\n\t" \
> +       "jmp    @%0\n\t" \

Resume at 2b.
Remember: branch delay slot, so the instruction below is executed first!

> +       " mov   %3, %0\n\t" \

Set err to -EFAULT.

> +       ".balign        4\n" \
> +       "4:     .long   2b\n\t" \
> +       ".previous\n" \

> +       ".previous" \
> +       :"=&r" (err), "=&r" (x) \
> +       :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })

[*] arch/m68k/include/asm/uaccess_mm.h

                        "1:     "MOVES".l       (%2)+,%1\n"             \
                        "2:     "MOVES".l       (%2),%R1\n"             \

                        "       .section __ex_table,\"a\"\n"            \
                        "       .align  4\n"                            \
                        "       .long   1b,10b\n"                       \
                        "       .long   2b,10b\n"                       \

Gr{oetje,eeting}s,

                        Geert
John Paul Adrian Glaubitz May 31, 2020, 10:52 a.m. UTC | #5
Hi Geert!

Thanks a lot for the explanation!

On 5/31/20 12:43 PM, Geert Uytterhoeven wrote:
>> Hmm, this change is not the case for __put_user_asm() vs. __put_user_u64().
>> But I have to admit, I don't know what the part below "3:\n\t" is for.
> 
> It's part of the exception handling, in case the passed (userspace) pointer
> points to an inaccessible address, and triggers an exception.
> 
> For an invalid store, nothing is done, besides returning -EFAULT.
> Hence there's no "mov #0, %1\n\t" in the put_user case.

I have replaced it with two individual mov's now as suggested since I now
understand what's happening here.

> For an invalid load, the data is replaced by zero, and -EFAULT is returned.
> 
>> +__asm__ __volatile__( \
>> +       "1:\n\t" \
>> +       "mov.l  %2,%R1\n\t" \
>> +       "mov.l  %T2,%S1\n\t" \
>> +       "2:\n" \
> 
> (reordering the two sections for easier explanation)
> 
>> +       ".section       __ex_table,\"a\"\n\t" \
>> +       ".long  1b, 3b\n\t" \
> 
> In case an exception happens for the instruction at 1b, jump to 3b.
> 
> Note that the m68k version has two entries here: one for each half of
> the 64-bit access[*].
> I don't know if that is really needed (and thus SH needs it, too), or if
> the exception code handles subsequent instructions automatically.

Hmm. I assume this is something one of the SH maintainers or Yutaka Niibe
can answer.

>> +       ".section       .fixup,\"ax\"\n" \
>> +       "3:\n\t" \
>> +       "mov    #0, %1\n\t" \
> 
> Return zero instead of the data at the (invalid) address.

Makes sense.

>> +       "mov.l  4f, %0\n\t" \
>> +       "jmp    @%0\n\t" \
> 
> Resume at 2b.
> Remember: branch delay slot, so the instruction below is executed first!

I didn't even know that SH has delay slots.

>> +       " mov   %3, %0\n\t" \
> 
> Set err to -EFAULT.

Yes.

>> +       ".balign        4\n" \
>> +       "4:     .long   2b\n\t" \
>> +       ".previous\n" \
> 
>> +       ".previous" \
>> +       :"=&r" (err), "=&r" (x) \
>> +       :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })
> 
> [*] arch/m68k/include/asm/uaccess_mm.h
> 
>                         "1:     "MOVES".l       (%2)+,%1\n"             \
>                         "2:     "MOVES".l       (%2),%R1\n"             \
> 
>                         "       .section __ex_table,\"a\"\n"            \
>                         "       .align  4\n"                            \
>                         "       .long   1b,10b\n"                       \
>                         "       .long   2b,10b\n"                       \
> 

Hmm. I'll wait for more feedback whether need to do the same as on m68k here.

Adrian
Rich Felker June 1, 2020, 3:03 a.m. UTC | #6
On Sun, May 31, 2020 at 12:43:11PM +0200, Geert Uytterhoeven wrote:
> Hi Adrian,
> 
> On Sun, May 31, 2020 at 11:59 AM John Paul Adrian Glaubitz
> <glaubitz@physik.fu-berlin.de> wrote:
> > On 5/31/20 11:54 AM, John Paul Adrian Glaubitz wrote:
> > > On 5/31/20 11:52 AM, Geert Uytterhoeven wrote:
> > >> As this is the 64-bit variant, I think this single move should be
> > >> replaced by a double move:
> > >>
> > >>        "mov  #0,%R1\n\t" \
> > >>        "mov  #0,%S1\n\t" \
> > >>
> > >> Same for the big endian version below.
> > >>
> > >> Disclaimer: uncompiled, untested, no SH assembler expert.
> > >
> > > Right, this makes sense. I'll send a new patch shortly.
> >
> > Hmm, this change is not the case for __put_user_asm() vs. __put_user_u64().
> > But I have to admit, I don't know what the part below "3:\n\t" is for.
> 
> It's part of the exception handling, in case the passed (userspace) pointer
> points to an inaccessible address, and triggers an exception.
> 
> For an invalid store, nothing is done, besides returning -EFAULT.
> Hence there's no "mov #0, %1\n\t" in the put_user case.
> For an invalid load, the data is replaced by zero, and -EFAULT is returned.
> 
> > +__asm__ __volatile__( \
> > +       "1:\n\t" \
> > +       "mov.l  %2,%R1\n\t" \
> > +       "mov.l  %T2,%S1\n\t" \
> > +       "2:\n" \
> 
> (reordering the two sections for easier explanation)
> 
> > +       ".section       __ex_table,\"a\"\n\t" \
> > +       ".long  1b, 3b\n\t" \
> 
> In case an exception happens for the instruction at 1b, jump to 3b.
> 
> Note that the m68k version has two entries here: one for each half of
> the 64-bit access[*].
> I don't know if that is really needed (and thus SH needs it, too), or if
> the exception code handles subsequent instructions automatically.

Can I propose a different solution? For archs where there isn't
actually any 64-bit load or store instruction, does it make sense to
be writing asm just to do two 32-bit loads/stores, especially when
this code is not in a hot path?

What about just having the 64-bit versions call the corresponding
32-bit version twice? (Ideally this would even be arch-generic and
could replace the m68k asm.) It would return EFAULT if either of the
32-bit calls did.

Rich
Geert Uytterhoeven June 1, 2020, 9:02 a.m. UTC | #7
Hi Rich,

On Mon, Jun 1, 2020 at 5:03 AM Rich Felker <dalias@libc.org> wrote:
> On Sun, May 31, 2020 at 12:43:11PM +0200, Geert Uytterhoeven wrote:
> > On Sun, May 31, 2020 at 11:59 AM John Paul Adrian Glaubitz
> > <glaubitz@physik.fu-berlin.de> wrote:
> > > On 5/31/20 11:54 AM, John Paul Adrian Glaubitz wrote:
> > > > On 5/31/20 11:52 AM, Geert Uytterhoeven wrote:
> > > >> As this is the 64-bit variant, I think this single move should be
> > > >> replaced by a double move:
> > > >>
> > > >>        "mov  #0,%R1\n\t" \
> > > >>        "mov  #0,%S1\n\t" \
> > > >>
> > > >> Same for the big endian version below.
> > > >>
> > > >> Disclaimer: uncompiled, untested, no SH assembler expert.
> > > >
> > > > Right, this makes sense. I'll send a new patch shortly.
> > >
> > > Hmm, this change is not the case for __put_user_asm() vs. __put_user_u64().
> > > But I have to admit, I don't know what the part below "3:\n\t" is for.
> >
> > It's part of the exception handling, in case the passed (userspace) pointer
> > points to an inaccessible address, and triggers an exception.
> >
> > For an invalid store, nothing is done, besides returning -EFAULT.
> > Hence there's no "mov #0, %1\n\t" in the put_user case.
> > For an invalid load, the data is replaced by zero, and -EFAULT is returned.
> >
> > > +__asm__ __volatile__( \
> > > +       "1:\n\t" \
> > > +       "mov.l  %2,%R1\n\t" \
> > > +       "mov.l  %T2,%S1\n\t" \
> > > +       "2:\n" \
> >
> > (reordering the two sections for easier explanation)
> >
> > > +       ".section       __ex_table,\"a\"\n\t" \
> > > +       ".long  1b, 3b\n\t" \
> >
> > In case an exception happens for the instruction at 1b, jump to 3b.
> >
> > Note that the m68k version has two entries here: one for each half of
> > the 64-bit access[*].
> > I don't know if that is really needed (and thus SH needs it, too), or if
> > the exception code handles subsequent instructions automatically.
>
> Can I propose a different solution? For archs where there isn't
> actually any 64-bit load or store instruction, does it make sense to
> be writing asm just to do two 32-bit loads/stores, especially when
> this code is not in a hot path?
>
> What about just having the 64-bit versions call the corresponding
> 32-bit version twice? (Ideally this would even be arch-generic and
> could replace the m68k asm.) It would return EFAULT if either of the
> 32-bit calls did.

Yes, that's an option, too.

Gr{oetje,eeting}s,

                        Geert
John Paul Adrian Glaubitz June 1, 2020, 9:13 a.m. UTC | #8
Hello!

On 6/1/20 11:02 AM, Geert Uytterhoeven wrote:
>> Can I propose a different solution? For archs where there isn't
>> actually any 64-bit load or store instruction, does it make sense to
>> be writing asm just to do two 32-bit loads/stores, especially when
>> this code is not in a hot path?
>>
>> What about just having the 64-bit versions call the corresponding
>> 32-bit version twice? (Ideally this would even be arch-generic and
>> could replace the m68k asm.) It would return EFAULT if either of the
>> 32-bit calls did.
> 
> Yes, that's an option, too.

That's the solution that Michael Karcher suggested to me as an alternative
when I talked to him off-list.

While I understand that it works, I don't like the inconsistency and I also
don't see why we should opt for a potentially slower solution when we can
used the fastest one.

I'm also not sure how the exception handling would properly work when you
have two invocations of __get_user_asm().

My current approach is consistent with the existing code, so I think it's
the natural choice. I just need someone with more experience in SH assembler
than me that the solution is correct.

I have already pinged Niibe-san in private, he'll hopefully get back to me
within the next days.

Adrian
Rich Felker June 1, 2020, 4:57 p.m. UTC | #9
On Mon, Jun 01, 2020 at 11:13:26AM +0200, John Paul Adrian Glaubitz wrote:
> Hello!
> 
> On 6/1/20 11:02 AM, Geert Uytterhoeven wrote:
> >> Can I propose a different solution? For archs where there isn't
> >> actually any 64-bit load or store instruction, does it make sense to
> >> be writing asm just to do two 32-bit loads/stores, especially when
> >> this code is not in a hot path?
> >>
> >> What about just having the 64-bit versions call the corresponding
> >> 32-bit version twice? (Ideally this would even be arch-generic and
> >> could replace the m68k asm.) It would return EFAULT if either of the
> >> 32-bit calls did.
> > 
> > Yes, that's an option, too.
> 
> That's the solution that Michael Karcher suggested to me as an alternative
> when I talked to him off-list.
> 
> While I understand that it works, I don't like the inconsistency and I also
> don't see why we should opt for a potentially slower solution when we can
> used the fastest one.
> 
> I'm also not sure how the exception handling would properly work when you
> have two invocations of __get_user_asm().
> 
> My current approach is consistent with the existing code, so I think it's
> the natural choice. I just need someone with more experience in SH assembler
> than me that the solution is correct.
> 
> I have already pinged Niibe-san in private, he'll hopefully get back to me
> within the next days.

I don't have an objection to doing it the way you've proposed, but I
don't think there's any performance distinction or issue with the two
invocations.
Michael Karcher June 1, 2020, 8:26 p.m. UTC | #10
Rich Felker schrieb:
>> >> Can I propose a different solution? For archs where there isn't
>> >> actually any 64-bit load or store instruction, does it make sense to
>> >> be writing asm just to do two 32-bit loads/stores, especially when
>> >> this code is not in a hot path?
>> > Yes, that's an option, too.
>> That's the solution that Michael Karcher suggested to me as an
>> alternative when I talked to him off-list.

There is a functional argument agains using get_user_32 twice, which I
overlooked in my private reply to Adrian. If any of the loads fail, we do
not only want err to be set to -EFAULT (which will happen), but we also
want a 64-bit zero as result. If one 32-bit read faults, but the other one
works, we would get -EFAULT together with 32 valid data bits, and 32 zero
bits.

I continue to elaborate on my performance remark, ignoring this functional
difference (which is a good reason to not just use two 32-bit accesses,
much more so than the performance "issue").

> I don't have an objection to doing it the way you've proposed, but I
> don't think there's any performance distinction or issue with the two
> invocations.

Assuming we don't need two exception table entries (put_user_64 currently
uses only one, maybe it's wrong), using put_user_32 twice creates an extra
unneeded exception table entry, which will "bloat" the exception table.
That table is most likely accessed by a binary search algorithm, so the
performance loss is marginal, though. Also a bigger table size is
cache-unfriendly. (Again, this is likely marginal again, as binary search
is already extremely cache-unfriendly).

A similar argument can be made for the exception handler. Even if we need
two entries in the exception table, so the first paragraph does not apply,
the two entries in the exception table can share the same exception
handler (clear the whole 64-bit destination to zero, set -EFAULT, jump
past both load instructions), so that part of (admittedly cold) kernel
code can get some instructios shorter.


Regards,
  Michael Karcher
Rich Felker June 1, 2020, 8:50 p.m. UTC | #11
On Mon, Jun 01, 2020 at 10:26:09PM +0200, Michael Karcher wrote:
> Rich Felker schrieb:
> >> >> Can I propose a different solution? For archs where there isn't
> >> >> actually any 64-bit load or store instruction, does it make sense to
> >> >> be writing asm just to do two 32-bit loads/stores, especially when
> >> >> this code is not in a hot path?
> >> > Yes, that's an option, too.
> >> That's the solution that Michael Karcher suggested to me as an
> >> alternative when I talked to him off-list.
> 
> There is a functional argument agains using get_user_32 twice, which I
> overlooked in my private reply to Adrian. If any of the loads fail, we do
> not only want err to be set to -EFAULT (which will happen), but we also
> want a 64-bit zero as result. If one 32-bit read faults, but the other one
> works, we would get -EFAULT together with 32 valid data bits, and 32 zero
> bits.

Indeed, if you do it that way you want to check the return value and
set the value to 0 if either faults.

BTW I'm not sure what's supposed to happen on write if half faults
after the other half already succeeded... Either a C approach or an
asm approach has to consider that.

> > I don't have an objection to doing it the way you've proposed, but I
> > don't think there's any performance distinction or issue with the two
> > invocations.
> 
> Assuming we don't need two exception table entries (put_user_64 currently
> uses only one, maybe it's wrong), using put_user_32 twice creates an extra
> unneeded exception table entry, which will "bloat" the exception table.
> That table is most likely accessed by a binary search algorithm, so the
> performance loss is marginal, though. Also a bigger table size is
> cache-unfriendly. (Again, this is likely marginal again, as binary search
> is already extremely cache-unfriendly).
> 
> A similar argument can be made for the exception handler. Even if we need
> two entries in the exception table, so the first paragraph does not apply,
> the two entries in the exception table can share the same exception
> handler (clear the whole 64-bit destination to zero, set -EFAULT, jump
> past both load instructions), so that part of (admittedly cold) kernel
> code can get some instructios shorter.

Indeed. I don't think it's a significant difference but if kernel
folks do that's fine. In cases like this my personal preference is to
err on the side of less arch-specific asm.

Rich
Michael Karcher June 2, 2020, 10:19 a.m. UTC | #12
Rich Felker schrieb:
>> There is a functional argument agains using get_user_32 twice, which I
>> overlooked in my private reply to Adrian. If any of the loads fail, we
>> do not only want err to be set to -EFAULT (which will happen), but we
>> also want a 64-bit zero as result. If one 32-bit read faults, but the
>> other one works, we would get -EFAULT together with 32 valid data bits,
>> and 32 zero bits.
> Indeed, if you do it that way you want to check the return value and
> set the value to 0 if either faults.

Indeed. And if you do *that*, the performance of the hot path is affected
by the extra check. The performance discussion below only applied to the
cold path, so it seems perfectly valid to disregard it in favore of better
maintainability. On the other hand, checking the return value has a
possibly more serious performance and size (and if you like at the
I-Cache, size means performance) impact. When discussing size impact,
we should keep in mind that put_user for fixed size is supposed to be
inlined, so it's not a one-time cost, but a cost per call. On the other
hand, though, put_user for 64-bit values on SH4 seems to be nearly never
called, so the impact is still most likely negligible.

> BTW I'm not sure what's supposed to happen on write if half faults
> after the other half already succeeded... Either a C approach or an
> asm approach has to consider that.
That's an interesting question. From a kernel developer's point of view,
it seems like a valid view to say: "If userspace provides a bad pointer
where the kernel has to put the data, it's a problem of userspace. The
kernel only needs to tell userspace that the write is incomplete." This
is different to the defensive approach used when reading from user space
into kernel space. Here forcing the whole 64 bit to be zero makes the
kernel itself more robust by removing strange corner cases.

> Indeed. I don't think it's a significant difference but if kernel
> folks do that's fine. In cases like this my personal preference is to
> err on the side of less arch-specific asm.
This is a good idea to reduce maintainance cost. I agree it's up to the
kernel folks to decide whether:
 - Half-zeroed reads of partially faulted 64-bit-reads are acceptable
 - Cold error checks in the hot path to ensure full zeroing is acceptable
 - maintainance of arch-specific asm on many 32-bit architectures is
acceptable.

I don't want to endorse one of these three options, as I am out of the loop
regarding kernel development priorities and philosophy, I just intend to
point out the different options the kernel has to pick the one that fits
best.

Regards,
  Michael Karcher

Patch
diff mbox series

diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h
index 624cf55acc27..8bc1cb50f8bf 100644
--- a/arch/sh/include/asm/uaccess_32.h
+++ b/arch/sh/include/asm/uaccess_32.h
@@ -26,6 +26,9 @@  do {								\
 	case 4:							\
 		__get_user_asm(x, ptr, retval, "l");		\
 		break;						\
+	case 8:							\
+		__get_user_u64(x, ptr, retval);			\
+		break;						\
 	default:						\
 		__get_user_unknown();				\
 		break;						\
@@ -66,6 +69,52 @@  do {							\
 
 extern void __get_user_unknown(void);
 
+#if defined(CONFIG_CPU_LITTLE_ENDIAN)
+#define __get_user_u64(x, addr, err) \
+({ \
+__asm__ __volatile__( \
+	"1:\n\t" \
+	"mov.l	%2,%R1\n\t" \
+	"mov.l	%T2,%S1\n\t" \
+	"2:\n" \
+	".section	.fixup,\"ax\"\n" \
+	"3:\n\t" \
+	"mov	#0, %1\n\t" \
+	"mov.l	4f, %0\n\t" \
+	"jmp	@%0\n\t" \
+	" mov	%3, %0\n\t" \
+	".balign	4\n" \
+	"4:	.long	2b\n\t" \
+	".previous\n" \
+	".section	__ex_table,\"a\"\n\t" \
+	".long	1b, 3b\n\t" \
+	".previous" \
+	:"=&r" (err), "=&r" (x) \
+	:"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })
+#else
+#define __get_user_u64(x, addr, err) \
+({ \
+__asm__ __volatile__( \
+	"1:\n\t" \
+	"mov.l	%2,%S1\n\t" \
+	"mov.l	%T2,%R1\n\t" \
+	"2:\n" \
+	".section	.fixup,\"ax\"\n" \
+	"3:\n\t" \
+	"mov	#0, %1\n\t" \
+	"mov.l	4f, %0\n\t" \
+	"jmp	@%0\n\t" \
+	" mov	%3, %0\n\t" \
+	".balign	4\n" \
+	"4:	.long	2b\n\t" \
+	".previous\n" \
+	".section	__ex_table,\"a\"\n\t" \
+	".long	1b, 3b\n\t" \
+	".previous" \
+	:"=&r" (err), "=&r" (x) \
+	:"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })
+#endif
+
 #define __put_user_size(x,ptr,size,retval)		\
 do {							\
 	retval = 0;					\