Message ID | 1352495853-9790-1-git-send-email-rob.clark@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote: > From: Rob Clark <rob@ti.com> > > A new atomic modeset/pageflip ioctl being developed in DRM requires > get_user() to work for 64bit types (in addition to just put_user()). > > Signed-off-by: Rob Clark <rob@ti.com> > --- > arch/arm/include/asm/uaccess.h | 25 ++++++++++++++++++++----- > arch/arm/lib/getuser.S | 17 ++++++++++++++++- > 2 files changed, 36 insertions(+), 6 deletions(-) > > diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h > index 7e1f760..2e3fdb2 100644 > --- a/arch/arm/include/asm/uaccess.h > +++ b/arch/arm/include/asm/uaccess.h > @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs) > extern int __get_user_1(void *); > extern int __get_user_2(void *); > extern int __get_user_4(void *); > +extern int __get_user_8(void *); > > #define __GUP_CLOBBER_1 "lr", "cc" > #ifdef CONFIG_CPU_USE_DOMAINS > @@ -108,6 +109,7 @@ extern int __get_user_4(void *); > #define __GUP_CLOBBER_2 "lr", "cc" > #endif > #define __GUP_CLOBBER_4 "lr", "cc" > +#define __GUP_CLOBBER_8 "lr", "cc" > > #define __get_user_x(__r2,__p,__e,__l,__s) \ > __asm__ __volatile__ ( \ > @@ -122,22 +124,35 @@ extern int __get_user_4(void *); > ({ \ > unsigned long __limit = current_thread_info()->addr_limit - 1; \ > register const typeof(*(p)) __user *__p asm("r0") = (p);\ > - register unsigned long __r2 asm("r2"); \ > register unsigned long __l asm("r1") = __limit; \ > register int __e asm("r0"); \ > switch (sizeof(*(__p))) { \ > - case 1: \ > + case 1: { \ > + register unsigned long __r2 asm("r2"); \ > __get_user_x(__r2, __p, __e, __l, 1); \ > + x = (typeof(*(p))) __r2; \ > break; \ > - case 2: \ > + } \ > + case 2: { \ > + register unsigned long __r2 asm("r2"); \ > __get_user_x(__r2, __p, __e, __l, 2); \ > + x = (typeof(*(p))) __r2; \ > break; \ > - case 4: \ > + } \ > + case 4: { \ > + register unsigned long __r2 asm("r2"); \ > __get_user_x(__r2, __p, __e, __l, 4); \ > + x = (typeof(*(p))) __r2; \ > + break; \ > + } \ > + case 8: { \ > + register unsigned long long __r2 asm("r2"); \ Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted asm, so the compiler shouldn't care much. For OABI, I think you may have to do some more work to get the two words where you want them. > + __get_user_x(__r2, __p, __e, __l, 8); \ > + x = (typeof(*(p))) __r2; \ > break; \ > + } \ > default: __e = __get_user_bad(); break; \ > } \ > - x = (typeof(*(p))) __r2; \ > __e; \ > }) > > diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S > index 9b06bb4..d05285c 100644 > --- a/arch/arm/lib/getuser.S > +++ b/arch/arm/lib/getuser.S > @@ -18,7 +18,7 @@ > * Inputs: r0 contains the address > * r1 contains the address limit, which must be preserved > * Outputs: r0 is the error code > - * r2 contains the zero-extended value > + * r2, r3 contains the zero-extended value > * lr corrupted > * > * No other registers must be altered. (see <asm/uaccess.h> > @@ -66,6 +66,19 @@ ENTRY(__get_user_4) > mov pc, lr > ENDPROC(__get_user_4) > > +ENTRY(__get_user_8) > + check_uaccess r0, 4, r1, r2, __get_user_bad Shouldn't you be passing 8 here, so that we validate the correct range? > +#ifdef CONFIG_THUMB2_KERNEL > +5: TUSER(ldr) r2, [r0] > +6: TUSER(ldr) r3, [r0, #4] > +#else > +5: TUSER(ldr) r2, [r0], #4 > +6: TUSER(ldr) r3, [r0] > +#endif This doesn't work for EABI big-endian systems. Will
On Mon, Nov 12, 2012 at 4:46 AM, Will Deacon <will.deacon@arm.com> wrote: > On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote: >> From: Rob Clark <rob@ti.com> >> >> A new atomic modeset/pageflip ioctl being developed in DRM requires >> get_user() to work for 64bit types (in addition to just put_user()). >> >> Signed-off-by: Rob Clark <rob@ti.com> >> --- >> arch/arm/include/asm/uaccess.h | 25 ++++++++++++++++++++----- >> arch/arm/lib/getuser.S | 17 ++++++++++++++++- >> 2 files changed, 36 insertions(+), 6 deletions(-) >> >> diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h >> index 7e1f760..2e3fdb2 100644 >> --- a/arch/arm/include/asm/uaccess.h >> +++ b/arch/arm/include/asm/uaccess.h >> @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs) >> extern int __get_user_1(void *); >> extern int __get_user_2(void *); >> extern int __get_user_4(void *); >> +extern int __get_user_8(void *); >> >> #define __GUP_CLOBBER_1 "lr", "cc" >> #ifdef CONFIG_CPU_USE_DOMAINS >> @@ -108,6 +109,7 @@ extern int __get_user_4(void *); >> #define __GUP_CLOBBER_2 "lr", "cc" >> #endif >> #define __GUP_CLOBBER_4 "lr", "cc" >> +#define __GUP_CLOBBER_8 "lr", "cc" >> >> #define __get_user_x(__r2,__p,__e,__l,__s) \ >> __asm__ __volatile__ ( \ >> @@ -122,22 +124,35 @@ extern int __get_user_4(void *); >> ({ \ >> unsigned long __limit = current_thread_info()->addr_limit - 1; \ >> register const typeof(*(p)) __user *__p asm("r0") = (p);\ >> - register unsigned long __r2 asm("r2"); \ >> register unsigned long __l asm("r1") = __limit; \ >> register int __e asm("r0"); \ >> switch (sizeof(*(__p))) { \ >> - case 1: \ >> + case 1: { \ >> + register unsigned long __r2 asm("r2"); \ >> __get_user_x(__r2, __p, __e, __l, 1); \ >> + x = (typeof(*(p))) __r2; \ >> break; \ >> - case 2: \ >> + } \ >> + case 2: { \ >> + register unsigned long __r2 asm("r2"); \ >> __get_user_x(__r2, __p, __e, __l, 2); \ >> + x = (typeof(*(p))) __r2; \ >> break; \ >> - case 4: \ >> + } \ >> + case 4: { \ >> + register unsigned long __r2 asm("r2"); \ >> __get_user_x(__r2, __p, __e, __l, 4); \ >> + x = (typeof(*(p))) __r2; \ >> + break; \ >> + } \ >> + case 8: { \ >> + register unsigned long long __r2 asm("r2"); \ > > Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted > asm, so the compiler shouldn't care much. For OABI, I think you may have to > do some more work to get the two words where you want them. Is the question whether the compiler is guaranteed to allocate r2 and r3 in all cases? I'm not quite sure, I confess to usually trying to avoid inline asm. But from looking at the disassembly (for little endian EABI build) it seemed to do the right thing. The only other idea I had was to explicitly declare two 'unsigned long's and then shift them into a 64bit x, although I'm open to suggestions if there is a better way. >> + __get_user_x(__r2, __p, __e, __l, 8); \ >> + x = (typeof(*(p))) __r2; \ >> break; \ >> + } \ >> default: __e = __get_user_bad(); break; \ >> } \ >> - x = (typeof(*(p))) __r2; \ >> __e; \ >> }) >> >> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S >> index 9b06bb4..d05285c 100644 >> --- a/arch/arm/lib/getuser.S >> +++ b/arch/arm/lib/getuser.S >> @@ -18,7 +18,7 @@ >> * Inputs: r0 contains the address >> * r1 contains the address limit, which must be preserved >> * Outputs: r0 is the error code >> - * r2 contains the zero-extended value >> + * r2, r3 contains the zero-extended value >> * lr corrupted >> * >> * No other registers must be altered. (see <asm/uaccess.h> >> @@ -66,6 +66,19 @@ ENTRY(__get_user_4) >> mov pc, lr >> ENDPROC(__get_user_4) >> >> +ENTRY(__get_user_8) >> + check_uaccess r0, 4, r1, r2, __get_user_bad > > Shouldn't you be passing 8 here, so that we validate the correct range? yes, sorry, I'll fix that >> +#ifdef CONFIG_THUMB2_KERNEL >> +5: TUSER(ldr) r2, [r0] >> +6: TUSER(ldr) r3, [r0, #4] >> +#else >> +5: TUSER(ldr) r2, [r0], #4 >> +6: TUSER(ldr) r3, [r0] >> +#endif > > This doesn't work for EABI big-endian systems. Hmm, is that true? Wouldn't put_user() then have the same problem? I guess __ARMEB__ is the flag for big-endian? BR, -R > Will > > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 12, 2012 at 01:46:57PM +0000, Rob Clark wrote: > On Mon, Nov 12, 2012 at 4:46 AM, Will Deacon <will.deacon@arm.com> wrote: > > On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote: > >> @@ -122,22 +124,35 @@ extern int __get_user_4(void *); > >> ({ \ > >> unsigned long __limit = current_thread_info()->addr_limit - 1; \ > >> register const typeof(*(p)) __user *__p asm("r0") = (p);\ > >> - register unsigned long __r2 asm("r2"); \ > >> register unsigned long __l asm("r1") = __limit; \ > >> register int __e asm("r0"); \ > >> switch (sizeof(*(__p))) { \ > >> - case 1: \ > >> + case 1: { \ > >> + register unsigned long __r2 asm("r2"); \ > >> __get_user_x(__r2, __p, __e, __l, 1); \ > >> + x = (typeof(*(p))) __r2; \ > >> break; \ > >> - case 2: \ > >> + } \ > >> + case 2: { \ > >> + register unsigned long __r2 asm("r2"); \ > >> __get_user_x(__r2, __p, __e, __l, 2); \ > >> + x = (typeof(*(p))) __r2; \ > >> break; \ > >> - case 4: \ > >> + } \ > >> + case 4: { \ > >> + register unsigned long __r2 asm("r2"); \ > >> __get_user_x(__r2, __p, __e, __l, 4); \ > >> + x = (typeof(*(p))) __r2; \ > >> + break; \ > >> + } \ > >> + case 8: { \ > >> + register unsigned long long __r2 asm("r2"); \ > > > > Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted > > asm, so the compiler shouldn't care much. For OABI, I think you may have to > > do some more work to get the two words where you want them. > > Is the question whether the compiler is guaranteed to allocate r2 and > r3 in all cases? I'm not quite sure, I confess to usually trying to > avoid inline asm. But from looking at the disassembly (for little > endian EABI build) it seemed to do the right thing. I can't recall how OABI represents 64-bit values and particularly whether this differs between little and big-endian, so I wondered whether you may have to do some marshalling when you assign x. However, a few quick experiments with GCC suggest that the register representation matches EABI in regards to word ordering (it just doesn't require an even base register), although it would be good to find this written down somewhere... > The only other idea I had was to explicitly declare two 'unsigned > long's and then shift them into a 64bit x, although I'm open to > suggestions if there is a better way. Can't you just use register unsigned long long for all cases? Even better, follow what put_user does and use typeof(*(p))? > >> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S > >> index 9b06bb4..d05285c 100644 > >> --- a/arch/arm/lib/getuser.S > >> +++ b/arch/arm/lib/getuser.S > >> @@ -18,7 +18,7 @@ > >> * Inputs: r0 contains the address > >> * r1 contains the address limit, which must be preserved > >> * Outputs: r0 is the error code > >> - * r2 contains the zero-extended value > >> + * r2, r3 contains the zero-extended value > >> * lr corrupted > >> * > >> * No other registers must be altered. (see <asm/uaccess.h> > >> @@ -66,6 +66,19 @@ ENTRY(__get_user_4) > >> mov pc, lr > >> ENDPROC(__get_user_4) > >> > >> +ENTRY(__get_user_8) > >> + check_uaccess r0, 4, r1, r2, __get_user_bad > > > > Shouldn't you be passing 8 here, so that we validate the correct range? > > yes, sorry, I'll fix that > > >> +#ifdef CONFIG_THUMB2_KERNEL > >> +5: TUSER(ldr) r2, [r0] > >> +6: TUSER(ldr) r3, [r0, #4] > >> +#else > >> +5: TUSER(ldr) r2, [r0], #4 > >> +6: TUSER(ldr) r3, [r0] > >> +#endif > > > > This doesn't work for EABI big-endian systems. > > Hmm, is that true? Wouldn't put_user() then have the same problem? I dug up the PCS and it seems that we arrange the two halves of the doubleword to match the ldm/stm memory representation for EABI, so sorry for the confusion. > I guess __ARMEB__ is the flag for big-endian? That's the thing defined by the compiler, yes. Will
On Mon, Nov 12, 2012 at 8:38 AM, Will Deacon <will.deacon@arm.com> wrote: > On Mon, Nov 12, 2012 at 01:46:57PM +0000, Rob Clark wrote: >> On Mon, Nov 12, 2012 at 4:46 AM, Will Deacon <will.deacon@arm.com> wrote: >> > On Fri, Nov 09, 2012 at 09:17:33PM +0000, Rob Clark wrote: >> >> @@ -122,22 +124,35 @@ extern int __get_user_4(void *); >> >> ({ \ >> >> unsigned long __limit = current_thread_info()->addr_limit - 1; \ >> >> register const typeof(*(p)) __user *__p asm("r0") = (p);\ >> >> - register unsigned long __r2 asm("r2"); \ >> >> register unsigned long __l asm("r1") = __limit; \ >> >> register int __e asm("r0"); \ >> >> switch (sizeof(*(__p))) { \ >> >> - case 1: \ >> >> + case 1: { \ >> >> + register unsigned long __r2 asm("r2"); \ >> >> __get_user_x(__r2, __p, __e, __l, 1); \ >> >> + x = (typeof(*(p))) __r2; \ >> >> break; \ >> >> - case 2: \ >> >> + } \ >> >> + case 2: { \ >> >> + register unsigned long __r2 asm("r2"); \ >> >> __get_user_x(__r2, __p, __e, __l, 2); \ >> >> + x = (typeof(*(p))) __r2; \ >> >> break; \ >> >> - case 4: \ >> >> + } \ >> >> + case 4: { \ >> >> + register unsigned long __r2 asm("r2"); \ >> >> __get_user_x(__r2, __p, __e, __l, 4); \ >> >> + x = (typeof(*(p))) __r2; \ >> >> + break; \ >> >> + } \ >> >> + case 8: { \ >> >> + register unsigned long long __r2 asm("r2"); \ >> > >> > Does this matter? For EABI, we'll pass in (r2, r3) and it's all handcrafted >> > asm, so the compiler shouldn't care much. For OABI, I think you may have to >> > do some more work to get the two words where you want them. >> >> Is the question whether the compiler is guaranteed to allocate r2 and >> r3 in all cases? I'm not quite sure, I confess to usually trying to >> avoid inline asm. But from looking at the disassembly (for little >> endian EABI build) it seemed to do the right thing. > > I can't recall how OABI represents 64-bit values and particularly whether this > differs between little and big-endian, so I wondered whether you may have to > do some marshalling when you assign x. However, a few quick experiments with > GCC suggest that the register representation matches EABI in regards to word > ordering (it just doesn't require an even base register), although it would > be good to find this written down somewhere... yeah, I was kinda hoping that someone a bit closer to the compiler would speak up here :-) >> The only other idea I had was to explicitly declare two 'unsigned >> long's and then shift them into a 64bit x, although I'm open to >> suggestions if there is a better way. > > Can't you just use register unsigned long long for all cases? Even better, > follow what put_user does and use typeof(*(p))? typeof(*(p) was my first try but: register typeof(*(p)) __r2 asm("r2"); gives me the error: error: read-only variable ‘__r2’ used as ‘asm’ output I guess because 'const' ends up being part of the typeof *p? I suppose I could do typeof(x) instead BR, -R >> >> diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S >> >> index 9b06bb4..d05285c 100644 >> >> --- a/arch/arm/lib/getuser.S >> >> +++ b/arch/arm/lib/getuser.S >> >> @@ -18,7 +18,7 @@ >> >> * Inputs: r0 contains the address >> >> * r1 contains the address limit, which must be preserved >> >> * Outputs: r0 is the error code >> >> - * r2 contains the zero-extended value >> >> + * r2, r3 contains the zero-extended value >> >> * lr corrupted >> >> * >> >> * No other registers must be altered. (see <asm/uaccess.h> >> >> @@ -66,6 +66,19 @@ ENTRY(__get_user_4) >> >> mov pc, lr >> >> ENDPROC(__get_user_4) >> >> >> >> +ENTRY(__get_user_8) >> >> + check_uaccess r0, 4, r1, r2, __get_user_bad >> > >> > Shouldn't you be passing 8 here, so that we validate the correct range? >> >> yes, sorry, I'll fix that >> >> >> +#ifdef CONFIG_THUMB2_KERNEL >> >> +5: TUSER(ldr) r2, [r0] >> >> +6: TUSER(ldr) r3, [r0, #4] >> >> +#else >> >> +5: TUSER(ldr) r2, [r0], #4 >> >> +6: TUSER(ldr) r3, [r0] >> >> +#endif >> > >> > This doesn't work for EABI big-endian systems. >> >> Hmm, is that true? Wouldn't put_user() then have the same problem? > > I dug up the PCS and it seems that we arrange the two halves of the > doubleword to match the ldm/stm memory representation for EABI, so sorry > for the confusion. > >> I guess __ARMEB__ is the flag for big-endian? > > That's the thing defined by the compiler, yes. > > Will > > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote: > From: Rob Clark <rob@ti.com> > > A new atomic modeset/pageflip ioctl being developed in DRM requires > get_user() to work for 64bit types (in addition to just put_user()). NAK. (I did write a better email explaining all the ins and outs of why this won't work and why 64-bit get_user isn't possible, but my editor crapped out and lost all that well written message; I don't fancy typing it all out again.) Nevertheless, int test_ptr(unsigned int **v, unsigned int **p) { return get_user(*v, p); } produces a warning, and you can't get away from that if you stick 64-bit support into get_user(). Sorry, 64-bit get_user() is a no-no.
On Mon, Nov 12, 2012 at 1:27 PM, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote: > On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote: >> From: Rob Clark <rob@ti.com> >> >> A new atomic modeset/pageflip ioctl being developed in DRM requires >> get_user() to work for 64bit types (in addition to just put_user()). > > NAK. > > (I did write a better email explaining all the ins and outs of why this > won't work and why 64-bit get_user isn't possible, but my editor crapped > out and lost all that well written message; I don't fancy typing it all > out again.) > > Nevertheless, > int test_ptr(unsigned int **v, unsigned int **p) > { > return get_user(*v, p); > } > > produces a warning, and you can't get away from that if you stick 64-bit > support into get_user(). Actually, it seems like using 'register typeof(x) __r2 asm("r2");' does avoid that warning.. I don't know if that was the only argument against 64-bit get_user(). But it will at least be inconvenient that get_user() works for 64bit on x86 but not arm.. BR, -R > Sorry, 64-bit get_user() is a no-no. > > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 12, 2012 at 01:58:32PM -0600, Rob Clark wrote: > On Mon, Nov 12, 2012 at 1:27 PM, Russell King - ARM Linux > <linux@arm.linux.org.uk> wrote: > > On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote: > >> From: Rob Clark <rob@ti.com> > >> > >> A new atomic modeset/pageflip ioctl being developed in DRM requires > >> get_user() to work for 64bit types (in addition to just put_user()). > > > > NAK. > > > > (I did write a better email explaining all the ins and outs of why this > > won't work and why 64-bit get_user isn't possible, but my editor crapped > > out and lost all that well written message; I don't fancy typing it all > > out again.) > > > > Nevertheless, > > int test_ptr(unsigned int **v, unsigned int **p) > > { > > return get_user(*v, p); > > } > > > > produces a warning, and you can't get away from that if you stick 64-bit > > support into get_user(). > > Actually, it seems like using 'register typeof(x) __r2 asm("r2");' > does avoid that warning.. That seems to pass the checks I've done on it so far, and seems rather obvious (there's been a number of people looking at this, none of whom have come up with that solution). Provided the final cast is kept (which is there to ensure proper typechecking), it seems like it might be a solution.
On Mon, Nov 12, 2012 at 5:08 PM, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote: > On Mon, Nov 12, 2012 at 01:58:32PM -0600, Rob Clark wrote: >> On Mon, Nov 12, 2012 at 1:27 PM, Russell King - ARM Linux >> <linux@arm.linux.org.uk> wrote: >> > On Fri, Nov 09, 2012 at 03:17:33PM -0600, Rob Clark wrote: >> >> From: Rob Clark <rob@ti.com> >> >> >> >> A new atomic modeset/pageflip ioctl being developed in DRM requires >> >> get_user() to work for 64bit types (in addition to just put_user()). >> > >> > NAK. >> > >> > (I did write a better email explaining all the ins and outs of why this >> > won't work and why 64-bit get_user isn't possible, but my editor crapped >> > out and lost all that well written message; I don't fancy typing it all >> > out again.) >> > >> > Nevertheless, >> > int test_ptr(unsigned int **v, unsigned int **p) >> > { >> > return get_user(*v, p); >> > } >> > >> > produces a warning, and you can't get away from that if you stick 64-bit >> > support into get_user(). >> >> Actually, it seems like using 'register typeof(x) __r2 asm("r2");' >> does avoid that warning.. > > That seems to pass the checks I've done on it so far, and seems rather > obvious (there's been a number of people looking at this, none of whom > have come up with that solution). Provided the final cast is kept > (which is there to ensure proper typechecking), it seems like it might > be a solution. I'm sort of thinking maybe we want to change 'switch (sizeof(*(__p)))' with 'switch (sizeof(typeof(x)))' in case someone ignores the compiler warning when they try something like: uint32_t x; uint64_t *p = ...; get_user(x, p); that was my one concern about 'register typeof(x) __r2 ...', but I think just changing the switch condition is enough. But maybe good to have some eyes on in case there is something else I'm not thinking of. BR, -R > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 12, 2012 at 05:33:41PM -0600, Rob Clark wrote: > I'm sort of thinking maybe we want to change 'switch (sizeof(*(__p)))' > with 'switch (sizeof(typeof(x)))' in case someone ignores the compiler > warning when they try something like: Definitely not. Ttype of access is controlled by the pointer, not by the size of what it's being assigned to. Switching that around is likely to break stuff hugely. Consider this: unsigned char __user *p; int val; get_user(val, p); If the pointer type is used to determine the access size, a char will be accessed. This is legal - because we end up assigning an unsigned character to an int. If the size of the destination was used, we'd access an int instead, which is larger than the pointer, and probably the wrong thing to do anyway. Think of get_user(a, b) as being a special accessor having the ultimate semantics of: "a = *b;" but done in a safe way with error checking. > uint32_t x; > uint64_t *p = ...; > get_user(x, p); > > that was my one concern about 'register typeof(x) __r2 ...', but I > think just changing the switch condition is enough. But maybe good to > have some eyes on in case there is something else I'm not thinking of. And what should happen in the above is exactly the same as what happens if you do: x = *p; with those types. For ARM, that would be a 64-bit access (if the compiler decides not to optimize away the upper 32-bit access) followed by a narrowing cast down to 32-bit. With get_user() of course, there's no option not to optimize it away. However, this _does_ reveal a bug with your approach. With sizeof(*p) being 8, and the type of __r2 being a 32-bit quantity, the compiler will choose the 64-bit accessor, which will corrupt r3 - and the compiler won't know that r3 has been corrupted. That's too unsafe. I just tried a variant of your approach, but got lots of warnings again: register unsigned long long __r2 asm("r2"); __get_user_x(__r2, __p, __e, 8, "lr"); x = (typeof(*(__p))) ((typeof(x))__r2); because typeof(x) in the test_ptr() case ends up being a pointer itself. So, back to the drawing board, and I think back to the original position of "we don't support this".
On Mon, Nov 12, 2012 at 5:53 PM, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote: > On Mon, Nov 12, 2012 at 05:33:41PM -0600, Rob Clark wrote: >> I'm sort of thinking maybe we want to change 'switch (sizeof(*(__p)))' >> with 'switch (sizeof(typeof(x)))' in case someone ignores the compiler >> warning when they try something like: > > Definitely not. Ttype of access is controlled by the pointer, not by the > size of what it's being assigned to. Switching that around is likely to > break stuff hugely. Consider this: > > unsigned char __user *p; > int val; > > get_user(val, p); > > If the pointer type is used to determine the access size, a char will be > accessed. This is legal - because we end up assigning an unsigned character > to an int. > > If the size of the destination was used, we'd access an int instead, which > is larger than the pointer, and probably the wrong thing to do anyway. > > Think of get_user(a, b) as being a special accessor having the ultimate > semantics of: "a = *b;" but done in a safe way with error checking. > >> uint32_t x; >> uint64_t *p = ...; >> get_user(x, p); >> >> that was my one concern about 'register typeof(x) __r2 ...', but I >> think just changing the switch condition is enough. But maybe good to >> have some eyes on in case there is something else I'm not thinking of. > > And what should happen in the above is exactly the same as what happens > if you do: > > x = *p; > > with those types. For ARM, that would be a 64-bit access (if the > compiler decides not to optimize away the upper 32-bit access) followed > by a narrowing cast down to 32-bit. With get_user() of course, there's > no option not to optimize it away. > > However, this _does_ reveal a bug with your approach. With sizeof(*p) > being 8, and the type of __r2 being a 32-bit quantity, the compiler will > choose the 64-bit accessor, which will corrupt r3 - and the compiler > won't know that r3 has been corrupted. right, that is what I was worried about.. but what about something along the lines of: case 8: { \ if (sizeof(x) < 8) \ __get_user_x(__r2, __p, __e, __l, 4); \ else \ __get_user_x(__r2, __p, __e, __l, 8); \ break; \ } \ maybe we need a special variant of __get_user_8() instead to get the right 32bits on big vs little endian systems, but I think something roughly along these lines could work. Or maybe in sizeof(x)<8 case, we just __get_user_bad().. I'm not 100% sure on whether this is supposed to be treated as an error case at compile time or not. BR, -R > That's too unsafe. > > I just tried a variant of your approach, but got lots of warnings again: > register unsigned long long __r2 asm("r2"); > __get_user_x(__r2, __p, __e, 8, "lr"); > x = (typeof(*(__p))) ((typeof(x))__r2); > because typeof(x) in the test_ptr() case ends up being a pointer itself. > > So, back to the drawing board, and I think back to the original position > of "we don't support this". > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tuesday 13 November 2012, Rob Clark wrote: > right, that is what I was worried about.. but what about something > along the lines of: > > case 8: { \ > if (sizeof(x) < 8) \ > __get_user_x(__r2, __p, __e, __l, 4); \ > else \ > __get_user_x(__r2, __p, __e, __l, 8); \ > break; \ > } \ I guess that's still broken if x is 8 or 16 bits wide. > maybe we need a special variant of __get_user_8() instead to get the > right 32bits on big vs little endian systems, but I think something > roughly along these lines could work. > > Or maybe in sizeof(x)<8 case, we just __get_user_bad().. I'm not 100% > sure on whether this is supposed to be treated as an error case at > compile time or not. We know that nobody is using that at the moment, so we could define it to be a compile-time error. But I still think this is a pointless exercise, a number of people have concluded independently that it's not worth trying to come up with a solution, whether one exists or not. Why can't you just use copy_from_user() anyway? Arnd
On Mon, Nov 12, 2012 at 06:31:50PM -0600, Rob Clark wrote: > right, that is what I was worried about.. but what about something > along the lines of: > > case 8: { \ > if (sizeof(x) < 8) \ > __get_user_x(__r2, __p, __e, __l, 4); \ > else \ > __get_user_x(__r2, __p, __e, __l, 8); \ > break; \ > } \ > > maybe we need a special variant of __get_user_8() instead to get the > right 32bits on big vs little endian systems, but I think something > roughly along these lines could work. The problem with that is... big endian systems - where the 32-bit word we want is the second one, so you can't just reduce that down to a 32-bit access like that. You also need to add an offset to the pointer in the BE case (which can be done.) I'd suggest calling the 4-byte version in there __get_user_xb() and doing the 4-byte offset for BE inside that (or aliasing it to __get_user_x for LE systems).
On Tue, Nov 13, 2012 at 09:11:09AM +0000, Arnd Bergmann wrote: > On Tuesday 13 November 2012, Rob Clark wrote: > > right, that is what I was worried about.. but what about something > > along the lines of: > > > > case 8: { \ > > if (sizeof(x) < 8) \ > > __get_user_x(__r2, __p, __e, __l, 4); \ > > else \ > > __get_user_x(__r2, __p, __e, __l, 8); \ > > break; \ > > } \ > > I guess that's still broken if x is 8 or 16 bits wide. Actually, it isn't - because if x is 8 or 16 bits wide, and we load a 32-bit quantity, all that follows is a narrowing cast which is exactly what happens today. We don't have a problem with register allocation like we have in the 32-bit x vs 64-bit pointer target type, which is what the above code works around. > > maybe we need a special variant of __get_user_8() instead to get the > > right 32bits on big vs little endian systems, but I think something > > roughly along these lines could work. > > > > Or maybe in sizeof(x)<8 case, we just __get_user_bad().. I'm not 100% > > sure on whether this is supposed to be treated as an error case at > > compile time or not. > > We know that nobody is using that at the moment, so we could define > it to be a compile-time error. > > But I still think this is a pointless exercise, a number of people have > concluded independently that it's not worth trying to come up with a > solution, whether one exists or not. Why can't you just use copy_from_user() > anyway? You're missing something; that is one of the greatest powers of open source. The many eyes (and minds) effect. Someone out there probably has a solution to whatever problem, the trick is to find that person. :) I think we have a working solution for this for ARM. It won't be suitable for every arch, where they have 8-bit and 16-bit registers able to be allocated by the compiler, but for architectures where the minimum register size is 32-bit, what we have below should work. In other words, I don't think this will work for x86-32 where ax, al, ah as well as eax are still available. What I have currently in my test file, which appears to work correctly, is (bear in mind this is based upon an older version of get_user() which pre-dates Will's cleanups): #define __get_user_x(__r2,__p,__e,__s,__i...) \ __asm__ __volatile__ ( \ "bl __get_user_" #__s \ : "=&r" (__e), "=r" (__r2) \ : "0" (__p) \ : __i, "cc") #ifdef BIG_ENDIAN #define __get_user_xb(__r2,__p,__e,__s,__i...) \ __get_user_x(__r2,(uintptr_t)__p + 4,__s,__i) #else #define __get_user_xb __get_user_x #endif #define get_user(x,p) \ ({ \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ register int __e asm("r0"); \ register typeof(x) __r2 asm("r2"); \ switch (sizeof(*(__p))) { \ case 1: \ __get_user_x(__r2, __p, __e, 1, "lr"); \ break; \ case 2: \ __get_user_x(__r2, __p, __e, 2, "r3", "lr"); \ break; \ case 4: \ __get_user_x(__r2, __p, __e, 4, "lr"); \ break; \ case 8: \ if (sizeof((x)) < 8) \ __get_user_xb(__r2, __p, __e, 4, "lr"); \ else \ __get_user_x(__r2, __p, __e, 8, "lr"); \ break; \ default: __e = __get_user_bad(); break; \ } \ x = (typeof(*(__p))) __r2; \ __e; \ }) Tested with 8, 16, 32, 64 ints, 32bit int with const p, pointers, const pointers, pointer to 64bit with 32bit target, pointer-to-const-pointer to non-const pointer (which should and does warn).
On Tuesday 13 November 2012, Russell King - ARM Linux wrote: > You're missing something; that is one of the greatest powers of open > source. The many eyes (and minds) effect. Someone out there probably > has a solution to whatever problem, the trick is to find that person. :) > > I think we have a working solution for this for ARM. It won't be suitable > for every arch, where they have 8-bit and 16-bit registers able to be > allocated by the compiler, but for architectures where the minimum register > size is 32-bit, what we have below should work. I don't mind at all adding the extension to ARM, and I think it's pretty cool that you guys actually found a working solution. The part that worries me is that we are making architecture independent code depend on a clever hack that may or may not be possible to implement on a given architecture, and that most architecture maintainers wouldn't know how to implement correctly even if it's possible. Arnd
On Thu, Nov 15, 2012 at 3:19 AM, Arnd Bergmann <arnd@arndb.de> wrote: > On Tuesday 13 November 2012, Russell King - ARM Linux wrote: >> You're missing something; that is one of the greatest powers of open >> source. The many eyes (and minds) effect. Someone out there probably >> has a solution to whatever problem, the trick is to find that person. :) >> >> I think we have a working solution for this for ARM. It won't be suitable >> for every arch, where they have 8-bit and 16-bit registers able to be >> allocated by the compiler, but for architectures where the minimum register >> size is 32-bit, what we have below should work. > > I don't mind at all adding the extension to ARM, and I think it's pretty > cool that you guys actually found a working solution. > > The part that worries me is that we are making architecture independent > code depend on a clever hack that may or may not be possible to implement > on a given architecture, and that most architecture maintainers wouldn't > know how to implement correctly even if it's possible. I could always send a 3rd version with a comment smashed on about why that works if you think this is a problem.. BR, -R > Arnd > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday 15 November 2012, Rob Clark wrote: > On Thu, Nov 15, 2012 at 3:19 AM, Arnd Bergmann <arnd@arndb.de> wrote: > > On Tuesday 13 November 2012, Russell King - ARM Linux wrote: > >> You're missing something; that is one of the greatest powers of open > >> source. The many eyes (and minds) effect. Someone out there probably > >> has a solution to whatever problem, the trick is to find that person. :) > >> > >> I think we have a working solution for this for ARM. It won't be suitable > >> for every arch, where they have 8-bit and 16-bit registers able to be > >> allocated by the compiler, but for architectures where the minimum register > >> size is 32-bit, what we have below should work. > > > > I don't mind at all adding the extension to ARM, and I think it's pretty > > cool that you guys actually found a working solution. > > > > The part that worries me is that we are making architecture independent > > code depend on a clever hack that may or may not be possible to implement > > on a given architecture, and that most architecture maintainers wouldn't > > know how to implement correctly even if it's possible. > > I could always send a 3rd version with a comment smashed on about why > that works if you think this is a problem.. Comments are always good, so I'd surely like to see those get added. As I said, I don't have any objections to the addition of your patch to the ARM code, which sounds useful to have. I still haven't heard a conclusive argument why we need to use get_user() rather than copy_from_user() in the DRM code. Is this about a fast path where you want to shave off a few cycles for each call, or does this simplify the code structure, or something else? Arnd
On Thu, Nov 15, 2012 at 7:39 AM, Arnd Bergmann <arnd@arndb.de> wrote: > On Thursday 15 November 2012, Rob Clark wrote: >> On Thu, Nov 15, 2012 at 3:19 AM, Arnd Bergmann <arnd@arndb.de> wrote: >> > On Tuesday 13 November 2012, Russell King - ARM Linux wrote: >> >> You're missing something; that is one of the greatest powers of open >> >> source. The many eyes (and minds) effect. Someone out there probably >> >> has a solution to whatever problem, the trick is to find that person. :) >> >> >> >> I think we have a working solution for this for ARM. It won't be suitable >> >> for every arch, where they have 8-bit and 16-bit registers able to be >> >> allocated by the compiler, but for architectures where the minimum register >> >> size is 32-bit, what we have below should work. >> > >> > I don't mind at all adding the extension to ARM, and I think it's pretty >> > cool that you guys actually found a working solution. >> > >> > The part that worries me is that we are making architecture independent >> > code depend on a clever hack that may or may not be possible to implement >> > on a given architecture, and that most architecture maintainers wouldn't >> > know how to implement correctly even if it's possible. >> >> I could always send a 3rd version with a comment smashed on about why >> that works if you think this is a problem.. > > Comments are always good, so I'd surely like to see those get added. > As I said, I don't have any objections to the addition of your patch to > the ARM code, which sounds useful to have. ok, I'll send a v3 w/ some additional comments > I still haven't heard a conclusive argument why we need to use get_user() > rather than copy_from_user() in the DRM code. Is this about a fast path > where you want to shave off a few cycles for each call, or does this > simplify the code structure, or something else? well, it is mostly because it seemed like a good idea to first try to solve the root issue, rather than having to fix things up in each driver when someone from x86-world introduces a 64b get_user().. There are some other arch's that don't have 64b get_user(), but I don't think any that have any DRM drivers. If 64b get_user() is really not intended to be supported by all archs, it is better to remove it from x86 and the other arch's that do currently support it, rather than making it possible to write drivers that are broken on some archs. BR, -R > Arnd > -- > To unsubscribe from this list: send the line "unsubscribe linux-omap" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday 15 November 2012, Rob Clark wrote: > > I still haven't heard a conclusive argument why we need to use get_user() > > rather than copy_from_user() in the DRM code. Is this about a fast path > > where you want to shave off a few cycles for each call, or does this > > simplify the code structure, or something else? > > well, it is mostly because it seemed like a good idea to first try to > solve the root issue, rather than having to fix things up in each > driver when someone from x86-world introduces a 64b get_user().. As pointed out by hpa earlier, x86-32 doesn't have a 64b get_user either. I don't think we have a lot of drivers that are used only on 64-bit x86 and on 32-bit ARM but not on 32-bit x86. > There are some other arch's that don't have 64b get_user(), but I > don't think any that have any DRM drivers. If 64b get_user() is > really not intended to be supported by all archs, it is better to > remove it from x86 and the other arch's that do currently support it, > rather than making it possible to write drivers that are broken on > some archs. The majority of architectures we support have PCI and should be able to build the regular (radeon, nouveau, MGA, VIA, ...) DRM drivers AFAICT. Arnd
On Thu, Nov 15, 2012 at 02:39:41PM +0000, Arnd Bergmann wrote: > On Thursday 15 November 2012, Rob Clark wrote: > > > I still haven't heard a conclusive argument why we need to use get_user() > > > rather than copy_from_user() in the DRM code. Is this about a fast path > > > where you want to shave off a few cycles for each call, or does this > > > simplify the code structure, or something else? > > > > well, it is mostly because it seemed like a good idea to first try to > > solve the root issue, rather than having to fix things up in each > > driver when someone from x86-world introduces a 64b get_user().. > > As pointed out by hpa earlier, x86-32 doesn't have a 64b get_user > either. I don't think we have a lot of drivers that are used only > on 64-bit x86 and on 32-bit ARM but not on 32-bit x86. Ouch. I didn't realize that x86-32 doesn't have it. All the systems where I've run the new code are 64bit so I never noticed the problem. I see there was a patch [1] posted a long time ago to implement 64bit get_user() on x86-32. I wonder what happened to it? [1] https://lkml.org/lkml/2004/4/20/96
On Mon, Nov 19, 2012 at 04:32:36PM +0200, Ville Syrjälä wrote: > On Thu, Nov 15, 2012 at 02:39:41PM +0000, Arnd Bergmann wrote: > > On Thursday 15 November 2012, Rob Clark wrote: > > > > I still haven't heard a conclusive argument why we need to use get_user() > > > > rather than copy_from_user() in the DRM code. Is this about a fast path > > > > where you want to shave off a few cycles for each call, or does this > > > > simplify the code structure, or something else? > > > > > > well, it is mostly because it seemed like a good idea to first try to > > > solve the root issue, rather than having to fix things up in each > > > driver when someone from x86-world introduces a 64b get_user().. > > > > As pointed out by hpa earlier, x86-32 doesn't have a 64b get_user > > either. I don't think we have a lot of drivers that are used only > > on 64-bit x86 and on 32-bit ARM but not on 32-bit x86. > > Ouch. I didn't realize that x86-32 doesn't have it. All the systems > where I've run the new code are 64bit so I never noticed the problem. > > I see there was a patch [1] posted a long time ago to implement 64bit > get_user() on x86-32. I wonder what happened to it? > > [1] https://lkml.org/lkml/2004/4/20/96 Wonderful lkml.org after four "Negotiating SSL connection..." messages gives me under elinks... Unable to retrieve https://lkml.org/lkml/2004/4/20/96: Error reading from socket and with wget: $ wget https://lkml.org/lkml/2004/4/20/96 --2012-11-19 14:47:16-- https://lkml.org/lkml/2004/4/20/96 Resolving lkml.org... 87.253.128.182 Connecting to lkml.org|87.253.128.182|:443... connected. HTTP request sent, awaiting response... No data received. Retrying. --2012-11-19 14:47:19-- (try: 2) https://lkml.org/lkml/2004/4/20/96 Connecting to lkml.org|87.253.128.182|:443... connected. HTTP request sent, awaiting response... No data received. Retrying. --2012-11-19 14:47:22-- (try: 3) https://lkml.org/lkml/2004/4/20/96 Connecting to lkml.org|87.253.128.182|:443... connected. HTTP request sent, awaiting response... No data received. Retrying. --2012-11-19 14:47:26-- (try: 4) https://lkml.org/lkml/2004/4/20/96 Connecting to lkml.org|87.253.128.182|:443... connected. HTTP request sent, awaiting response... No data received. Retrying. ... what a wonderful site... please choose another LKML archive, preferably one which works. Thanks.
On Mon, Nov 19, 2012 at 02:48:06PM +0000, Russell King - ARM Linux wrote: > On Mon, Nov 19, 2012 at 04:32:36PM +0200, Ville Syrjälä wrote: > > On Thu, Nov 15, 2012 at 02:39:41PM +0000, Arnd Bergmann wrote: > > > On Thursday 15 November 2012, Rob Clark wrote: > > > > > I still haven't heard a conclusive argument why we need to use get_user() > > > > > rather than copy_from_user() in the DRM code. Is this about a fast path > > > > > where you want to shave off a few cycles for each call, or does this > > > > > simplify the code structure, or something else? > > > > > > > > well, it is mostly because it seemed like a good idea to first try to > > > > solve the root issue, rather than having to fix things up in each > > > > driver when someone from x86-world introduces a 64b get_user().. > > > > > > As pointed out by hpa earlier, x86-32 doesn't have a 64b get_user > > > either. I don't think we have a lot of drivers that are used only > > > on 64-bit x86 and on 32-bit ARM but not on 32-bit x86. > > > > Ouch. I didn't realize that x86-32 doesn't have it. All the systems > > where I've run the new code are 64bit so I never noticed the problem. > > > > I see there was a patch [1] posted a long time ago to implement 64bit > > get_user() on x86-32. I wonder what happened to it? > > > > [1] https://lkml.org/lkml/2004/4/20/96 > > Wonderful lkml.org after four "Negotiating SSL connection..." messages > gives me under elinks... <snip> > what a wonderful site... please choose another LKML archive, preferably > one which works. Thanks. This one look like the same thing: http://article.gmane.org/gmane.linux.kernel/198823
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7e1f760..2e3fdb2 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs) extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); +extern int __get_user_8(void *); #define __GUP_CLOBBER_1 "lr", "cc" #ifdef CONFIG_CPU_USE_DOMAINS @@ -108,6 +109,7 @@ extern int __get_user_4(void *); #define __GUP_CLOBBER_2 "lr", "cc" #endif #define __GUP_CLOBBER_4 "lr", "cc" +#define __GUP_CLOBBER_8 "lr", "cc" #define __get_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ @@ -122,22 +124,35 @@ extern int __get_user_4(void *); ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ - register unsigned long __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ - case 1: \ + case 1: { \ + register unsigned long __r2 asm("r2"); \ __get_user_x(__r2, __p, __e, __l, 1); \ + x = (typeof(*(p))) __r2; \ break; \ - case 2: \ + } \ + case 2: { \ + register unsigned long __r2 asm("r2"); \ __get_user_x(__r2, __p, __e, __l, 2); \ + x = (typeof(*(p))) __r2; \ break; \ - case 4: \ + } \ + case 4: { \ + register unsigned long __r2 asm("r2"); \ __get_user_x(__r2, __p, __e, __l, 4); \ + x = (typeof(*(p))) __r2; \ + break; \ + } \ + case 8: { \ + register unsigned long long __r2 asm("r2"); \ + __get_user_x(__r2, __p, __e, __l, 8); \ + x = (typeof(*(p))) __r2; \ break; \ + } \ default: __e = __get_user_bad(); break; \ } \ - x = (typeof(*(p))) __r2; \ __e; \ }) diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 9b06bb4..d05285c 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -18,7 +18,7 @@ * Inputs: r0 contains the address * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2 contains the zero-extended value + * r2, r3 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see <asm/uaccess.h> @@ -66,6 +66,19 @@ ENTRY(__get_user_4) mov pc, lr ENDPROC(__get_user_4) +ENTRY(__get_user_8) + check_uaccess r0, 4, r1, r2, __get_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(ldr) r2, [r0] +6: TUSER(ldr) r3, [r0, #4] +#else +5: TUSER(ldr) r2, [r0], #4 +6: TUSER(ldr) r3, [r0] +#endif + mov r0, #0 + mov pc, lr +ENDPROC(__get_user_8) + __get_user_bad: mov r2, #0 mov r0, #-EFAULT @@ -77,4 +90,6 @@ ENDPROC(__get_user_bad) .long 2b, __get_user_bad .long 3b, __get_user_bad .long 4b, __get_user_bad + .long 5b, __get_user_bad + .long 6b, __get_user_bad .popsection