diff mbox

[RFC,v4,12/13] x86: implementation for HARDENED_ATOMIC

Message ID 1478809488-18303-13-git-send-email-elena.reshetova@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Reshetova, Elena Nov. 10, 2016, 8:24 p.m. UTC
This adds x86-specific code in order to support
HARDENED_ATOMIC feature. When overflow is detected
in atomic_t or atomic_long_t types, the counter is
decremented back by one (to keep it at INT_MAX or
LONG_MAX) and issue is reported using BUG().
The side effect is that in both legitimate and
non-legitimate cases a counter cannot wrap.

The copyright for the original PAX_REFCOUNT code:
  - all REFCOUNT code in general: PaX Team <pageexec@freemail.hu>
  - various false positive fixes: Mathias Krause <minipli@googlemail.com>

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: David Windsor <dwindsor@gmail.com>
---
 arch/x86/Kconfig                   |   1 +
 arch/x86/include/asm/atomic.h      | 315 +++++++++++++++++++++++++++++++++++--
 arch/x86/include/asm/atomic64_32.h | 227 +++++++++++++++++++++++++-
 arch/x86/include/asm/atomic64_64.h | 219 +++++++++++++++++++++++++-
 arch/x86/include/asm/bitops.h      |   8 +-
 arch/x86/include/asm/cmpxchg.h     |  39 +++++
 arch/x86/include/asm/local.h       | 149 +++++++++++++++++-
 arch/x86/include/asm/preempt.h     |   2 +-
 arch/x86/include/asm/rmwcc.h       |  82 ++++++++--
 arch/x86/include/asm/rwsem.h       |  50 ++++++
 arch/x86/kernel/traps.c            |   4 +
 arch/x86/lib/atomic64_386_32.S     | 135 ++++++++++++++++
 arch/x86/lib/atomic64_cx8_32.S     |  78 ++++++++-
 13 files changed, 1260 insertions(+), 49 deletions(-)

Comments

Peter Zijlstra Nov. 10, 2016, 8:40 p.m. UTC | #1
On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
>  static __always_inline void atomic_add(int i, atomic_t *v)
>  {
> +	asm volatile(LOCK_PREFIX "addl %1,%0\n"
> +
> +#ifdef CONFIG_HARDENED_ATOMIC
> +		     "jno 0f\n"
> +		     LOCK_PREFIX "subl %1,%0\n"
> +		     "int $4\n0:\n"
> +		     _ASM_EXTABLE(0b, 0b)


This is unreadable gunk.

> +#endif
> +
>  		     : "+m" (v->counter)
>  		     : "ir" (i));
>  }
Kees Cook Nov. 10, 2016, 9:04 p.m. UTC | #2
On Thu, Nov 10, 2016 at 12:40 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
>>  static __always_inline void atomic_add(int i, atomic_t *v)
>>  {
>> +     asm volatile(LOCK_PREFIX "addl %1,%0\n"
>> +
>> +#ifdef CONFIG_HARDENED_ATOMIC
>> +                  "jno 0f\n"
>> +                  LOCK_PREFIX "subl %1,%0\n"
>> +                  "int $4\n0:\n"
>> +                  _ASM_EXTABLE(0b, 0b)
>
>
> This is unreadable gunk.
>
>> +#endif
>> +
>>                    : "+m" (v->counter)
>>                    : "ir" (i));
>>  }

How would you suggest it be made readable? Or rather, what don't you
like about it?

-Kees
Peter Zijlstra Nov. 10, 2016, 9:16 p.m. UTC | #3
On Thu, Nov 10, 2016 at 01:04:20PM -0800, Kees Cook wrote:
> On Thu, Nov 10, 2016 at 12:40 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
> >>  static __always_inline void atomic_add(int i, atomic_t *v)
> >>  {
> >> +     asm volatile(LOCK_PREFIX "addl %1,%0\n"
> >> +
> >> +#ifdef CONFIG_HARDENED_ATOMIC
> >> +                  "jno 0f\n"
> >> +                  LOCK_PREFIX "subl %1,%0\n"
> >> +                  "int $4\n0:\n"
> >> +                  _ASM_EXTABLE(0b, 0b)
> >
> >
> > This is unreadable gunk.
> >
> >> +#endif
> >> +
> >>                    : "+m" (v->counter)
> >>                    : "ir" (i));
> >>  }
> 
> How would you suggest it be made readable? Or rather, what don't you
> like about it?

Try and find the label the jno jumps to.. I had to try 3 times.

Also, I hate how #ifdef CONFIG_HARDENED_ATOMIC is sprinkled all over, it
makes a huge trainwreck of that file.

Ideally there'd be only a single #ifdef CONFIG_HARNDED_ATOMIC.

I'm also not sure about atomic*_wrap() as an interface, these functions
already have far too long names. We could simply overload the existing
functions and select based off the argument type.
Kees Cook Nov. 10, 2016, 9:32 p.m. UTC | #4
On Thu, Nov 10, 2016 at 1:16 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Nov 10, 2016 at 01:04:20PM -0800, Kees Cook wrote:
>> On Thu, Nov 10, 2016 at 12:40 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>> > On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
>> >>  static __always_inline void atomic_add(int i, atomic_t *v)
>> >>  {
>> >> +     asm volatile(LOCK_PREFIX "addl %1,%0\n"
>> >> +
>> >> +#ifdef CONFIG_HARDENED_ATOMIC
>> >> +                  "jno 0f\n"
>> >> +                  LOCK_PREFIX "subl %1,%0\n"
>> >> +                  "int $4\n0:\n"
>> >> +                  _ASM_EXTABLE(0b, 0b)
>> >
>> >
>> > This is unreadable gunk.
>> >
>> >> +#endif
>> >> +
>> >>                    : "+m" (v->counter)
>> >>                    : "ir" (i));
>> >>  }
>>
>> How would you suggest it be made readable? Or rather, what don't you
>> like about it?
>
> Try and find the label the jno jumps to.. I had to try 3 times.
>
> Also, I hate how #ifdef CONFIG_HARDENED_ATOMIC is sprinkled all over, it
> makes a huge trainwreck of that file.

I guess the question was "prefer copy/pasting" vs "sprinkled ifdef". I
tend to opt for reducing copy/paste, but this is just a code
organization issue. Likely, then, would be to have two separate
implementations in different .c files and have the Makefile select the
desired version.

> Ideally there'd be only a single #ifdef CONFIG_HARNDED_ATOMIC.
>
> I'm also not sure about atomic*_wrap() as an interface, these functions
> already have far too long names. We could simply overload the existing
> functions and select based off the argument type.

There was a concern over catching type errors when building without
CONFIG_HARDENED_ATOMIC. Again, this is just a code organization issue
-- I think whatever people prefer is fine. In theory, 0-day will
quickly catch the corner cases, but I'd love it if the types couldn't
get mixed up.

Out of curiosity, how would the arg-type selection look? Are there
other examples of this already in the kernel?

-Kees
Peter Zijlstra Nov. 10, 2016, 9:33 p.m. UTC | #5
On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
> +static __always_inline int atomic_cmpxchg_wrap(atomic_wrap_t *v, int old, int new)
> +{
> +	return cmpxchg(&v->counter, old, new);
> +}

WTH does that even mean, a wrap for a cmpxchg.

I seriously detest this interface
Peter Zijlstra Nov. 10, 2016, 9:46 p.m. UTC | #6
On Thu, Nov 10, 2016 at 01:32:50PM -0800, Kees Cook wrote:
> > Also, I hate how #ifdef CONFIG_HARDENED_ATOMIC is sprinkled all over, it
> > makes a huge trainwreck of that file.
> 
> I guess the question was "prefer copy/pasting" vs "sprinkled ifdef". I
> tend to opt for reducing copy/paste, but this is just a code
> organization issue. Likely, then, would be to have two separate
> implementations in different .c files and have the Makefile select the
> desired version.

No, no .c files, this stuff wants to be inline, esp. the unannotated
versions, those typically are a single instruction.

And you don't need to copy/paste, just: #define atomic_wrap_t atomic_t
and use the 'normal' functions.

> > Ideally there'd be only a single #ifdef CONFIG_HARNDED_ATOMIC.
> >
> > I'm also not sure about atomic*_wrap() as an interface, these functions
> > already have far too long names. We could simply overload the existing
> > functions and select based off the argument type.
> 
> There was a concern over catching type errors when building without
> CONFIG_HARDENED_ATOMIC. Again, this is just a code organization issue
> -- I think whatever people prefer is fine. In theory, 0-day will
> quickly catch the corner cases, but I'd love it if the types couldn't
> get mixed up.
> 
> Out of curiosity, how would the arg-type selection look? Are there
> other examples of this already in the kernel?

See static_branch_likely() / static_branch_unlikely().
Peter Zijlstra Nov. 10, 2016, 10:50 p.m. UTC | #7
On Thu, Nov 10, 2016 at 09:40:46PM +0100, Peter Zijlstra wrote:
> On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
> >  static __always_inline void atomic_add(int i, atomic_t *v)
> >  {
> > +	asm volatile(LOCK_PREFIX "addl %1,%0\n"
> > +
> > +#ifdef CONFIG_HARDENED_ATOMIC
> > +		     "jno 0f\n"
> > +		     LOCK_PREFIX "subl %1,%0\n"
> > +		     "int $4\n0:\n"
> > +		     _ASM_EXTABLE(0b, 0b)
> 
> 
> This is unreadable gunk.

Worse, this is fundamentally racy and therefore not a proper atomic at
all.

The only way to do this is a cmpxchg loop and not issue the result on
overflow. Of course, that would completely suck performance wise, but
having a non atomic atomic blows more.

> > +#endif
> > +
> >  		     : "+m" (v->counter)
> >  		     : "ir" (i));
> >  }
Kees Cook Nov. 10, 2016, 11:07 p.m. UTC | #8
On Thu, Nov 10, 2016 at 2:50 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Nov 10, 2016 at 09:40:46PM +0100, Peter Zijlstra wrote:
>> On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
>> >  static __always_inline void atomic_add(int i, atomic_t *v)
>> >  {
>> > +   asm volatile(LOCK_PREFIX "addl %1,%0\n"
>> > +
>> > +#ifdef CONFIG_HARDENED_ATOMIC
>> > +                "jno 0f\n"
>> > +                LOCK_PREFIX "subl %1,%0\n"
>> > +                "int $4\n0:\n"
>> > +                _ASM_EXTABLE(0b, 0b)
>>
>>
>> This is unreadable gunk.
>
> Worse, this is fundamentally racy and therefore not a proper atomic at
> all.

The race was discussed earlier as well (and some of that should
already have been covered in the commit message), but basically this
is a race in the overflow protection, so it's not operationally a
problem. The process that caused the overflow still gets killed, and
if the system isn't already set up to panic on oops, this becomes a
resource leak instead of an exploitable condition.

> The only way to do this is a cmpxchg loop and not issue the result on
> overflow. Of course, that would completely suck performance wise, but
> having a non atomic atomic blows more.

There was some work to examine this performance impact, and it didn't
look terrible, but this race-on-overflow isn't viewed as a meaningful
risk in the refcount situation.

-Kees
Peter Zijlstra Nov. 10, 2016, 11:30 p.m. UTC | #9
On Thu, Nov 10, 2016 at 03:07:37PM -0800, Kees Cook wrote:
> On Thu, Nov 10, 2016 at 2:50 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Thu, Nov 10, 2016 at 09:40:46PM +0100, Peter Zijlstra wrote:
> >> On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
> >> >  static __always_inline void atomic_add(int i, atomic_t *v)
> >> >  {
> >> > +   asm volatile(LOCK_PREFIX "addl %1,%0\n"
> >> > +
> >> > +#ifdef CONFIG_HARDENED_ATOMIC
> >> > +                "jno 0f\n"
> >> > +                LOCK_PREFIX "subl %1,%0\n"
> >> > +                "int $4\n0:\n"
> >> > +                _ASM_EXTABLE(0b, 0b)
> >>
> >>
> >> This is unreadable gunk.
> >
> > Worse, this is fundamentally racy and therefore not a proper atomic at
> > all.
> 
> The race was discussed earlier as well 

Which is useless, since nobody was listening etc..

> (and some of that should
> already have been covered in the commit message)

it does not.

> , but basically this
> is a race in the overflow protection, so it's not operationally a
> problem. The process that caused the overflow still gets killed, and
> if the system isn't already set up to panic on oops, this becomes a
> resource leak instead of an exploitable condition.

Now is this harmless? If you have two increments racing like:

	  inc
	  jno 1 // overflow

			  inc
			  jno 1 // !overflow

	  dec
	1:		1:

The second thread will still affect your wrap and not BUG.

> 
> > The only way to do this is a cmpxchg loop and not issue the result on
> > overflow. Of course, that would completely suck performance wise, but
> > having a non atomic atomic blows more.
> 
> There was some work to examine this performance impact, and it didn't
> look terrible, but this race-on-overflow isn't viewed as a meaningful
> risk in the refcount situation.

I have a benchmark somewhere, I can run numbers tomorrow, but it really
shows once you get a bit of contention going. Once you hit 4 nodes
contending on a variable its completely out there IIRC.

The unconditional atomic ops really are loads faster than cmpxchg loops.
Reshetova, Elena Nov. 11, 2016, 9:20 a.m. UTC | #10
>On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
> +static __always_inline int atomic_cmpxchg_wrap(atomic_wrap_t *v, int 
> +old, int new) {
> +	return cmpxchg(&v->counter, old, new); }

>WTH does that even mean, a wrap for a cmpxchg.

We need to provide *_wrap() alternatives to all basic atomic functions in this patchset
since if we have (now or in the future) a variable in the kernel code somewhere that
should be opt-out of protection, it should use a different type, and as a result different
functions, even if the function behind doesn't do anything differently. People were very
concerned on type checks and possible confusions, so we spent time adjusting this to make
it hard to make mistake. 

>I seriously detest this interface

Then we need to work further on finding a better one, if it isn't acceptable. Everyone's end goal is
to get protections in place. We knew it is going to be a long and bumpy road to get it done. At least
this shows how it can be done. Also remember that Grsecurity/PaX protected kernels have been running
around for a while with similar changes and their users don't seem to have all their drivers panicking and
machines collapsing.
Reshetova, Elena Nov. 11, 2016, 9:32 a.m. UTC | #11
>On Thu, Nov 10, 2016 at 03:07:37PM -0800, Kees Cook wrote:
> On Thu, Nov 10, 2016 at 2:50 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Thu, Nov 10, 2016 at 09:40:46PM +0100, Peter Zijlstra wrote:
> >> On Thu, Nov 10, 2016 at 10:24:47PM +0200, Elena Reshetova wrote:
> >> >  static __always_inline void atomic_add(int i, atomic_t *v)  {
> >> > +   asm volatile(LOCK_PREFIX "addl %1,%0\n"
> >> > +
> >> > +#ifdef CONFIG_HARDENED_ATOMIC
> >> > +                "jno 0f\n"
> >> > +                LOCK_PREFIX "subl %1,%0\n"
> >> > +                "int $4\n0:\n"
> >> > +                _ASM_EXTABLE(0b, 0b)
> >>
> >>
> >> This is unreadable gunk.
> >
> > Worse, this is fundamentally racy and therefore not a proper atomic 
> > at all.
> 
> The race was discussed earlier as well

>Which is useless, since nobody was listening etc..

> (and some of that should
> already have been covered in the commit message)

>it does not.

It is covered in the documentation file (Documentation/security/
hardened-atomic.txt) included in the first patch.
There are just so many details that including them all in commit
messages will produce pages long commit messages, so some
things are moved to the documentation only. 

> , but basically this
> is a race in the overflow protection, so it's not operationally a 
> problem. The process that caused the overflow still gets killed, and 
> if the system isn't already set up to panic on oops, this becomes a 
> resource leak instead of an exploitable condition.

>Now is this harmless? If you have two increments racing like:

	  inc
	  jno 1 // overflow

			  inc
			  jno 1 // !overflow

	  dec
	1:		1:

>The second thread will still affect your wrap and not BUG.

> 
> > The only way to do this is a cmpxchg loop and not issue the result 
> > on overflow. Of course, that would completely suck performance wise, 
> > but having a non atomic atomic blows more.
> 
> There was some work to examine this performance impact, and it didn't 
> look terrible, but this race-on-overflow isn't viewed as a meaningful 
> risk in the refcount situation.

>I have a benchmark somewhere, I can run numbers tomorrow, but it really shows once you get a bit of contention going. Once you hit 4 nodes contending on a variable its completely out there IIRC.
This would help to get more numbers on this, thank you. 

>The unconditional atomic ops really are loads faster than cmpxchg loops.

Yes, and this is what we saw when doing performance measurements. So, as a result we went with a faster method, which we believed still has a low risk of getting into race. 
If you numbers prove otherwise, then we have to reconsider.
Peter Zijlstra Nov. 11, 2016, 10:29 a.m. UTC | #12
On Fri, Nov 11, 2016 at 09:32:45AM +0000, Reshetova, Elena wrote:
> It is covered in the documentation file (Documentation/security/
> hardened-atomic.txt) included in the first patch.
> There are just so many details that including them all in commit
> messages will produce pages long commit messages, so some
> things are moved to the documentation only. 

No, never skimp on Changelogs. Nobody reads documentation.

Also, this really should also have a very explicit code comment,
non-atomic constructs in atomic.h are 'surprising' at the very least.

> >Now is this harmless? If you have two increments racing like:
> 
> 	  inc
> 	  jno 1 // overflow
> 
> 			  inc
> 			  jno 1 // !overflow
> 
> 	  dec
> 	1:		1:
> 
> >The second thread will still affect your wrap and not BUG.

This still wants an answer, because attackers never exploit races?

> >I have a benchmark somewhere, I can run numbers tomorrow, but it
> >really shows once you get a bit of contention going. Once you hit 4
> >nodes contending on a variable its completely out there IIRC.

> This would help to get more numbers on this, thank you. 

		LOCK addl		LOCK cmpxchg-addl

1-node		1: 22.038250		1: 41.572270
		2: 174.019700           2: 198.965635
		3: 185.852060           3: 274.293927
		4: 389.169783           4: 266.738485
		6: 347.827897           6: 454.785715
		8: 369.649510           8: 463.125426


2-nodes		2: 428.448130		2: 1422.221850
		4: 616.203497           4: 1166.427205
		6: 855.639025           6: 1424.131080
		8: 1083.613291          8: 1402.484560

4-nodes		 4: 1180.591315		 4: 1830.301125
		 8: 1480.023056          8: 2043.418720
		16: 2602.128429         16: 2611.188079


Results are in cycles:u, average of 100000 loops.

As measured on a 4 socket IVB-EX (E7-4890 v2).

> >The unconditional atomic ops really are loads faster than cmpxchg
> >loops.
> 
> Yes, and this is what we saw when doing performance measurements. So,
> as a result we went with a faster method, which we believed still has
> a low risk of getting into race.  If you numbers prove otherwise, then
> we have to reconsider. 

Its fundamentally a question of semantics though. These are _atomic_
ops, they really should be, well, atomic. No exceptions.

If you want to play funny games, don't call them atomic.
Kees Cook Nov. 11, 2016, 6 p.m. UTC | #13
On Thu, Nov 10, 2016 at 3:30 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Nov 10, 2016 at 03:07:37PM -0800, Kees Cook wrote:
>> , but basically this
>> is a race in the overflow protection, so it's not operationally a
>> problem. The process that caused the overflow still gets killed, and
>> if the system isn't already set up to panic on oops, this becomes a
>> resource leak instead of an exploitable condition.
>
> Now is this harmless? If you have two increments racing like:

Adding some annotation for values and the BUG:

>
    INT_MAX
>           inc
    INT_MIN
>           jno 1 // overflow
>
>                           inc
    INT_MIN+1
>                           jno 1 // !overflow
>
>           dec
    INT_MIN
>         1:              1:
    BUG
>
> The second thread will still affect your wrap and not BUG.

Agreed: the first thread will BUG and the second thread is still halfway to 0.

On systems that panic on BUG, things are protected. For the rest of
the systems, an alternative to "dec" on overflow is to sub (more than)
NR_CPUS, to keep the saturation below the overflow level. This means
that it is still detected (BUG) by at least 1 thread, and cannot reach
0 (to trigger the flaw) on all other threads, even if they all lose
the race.

Some further details on examining the race is here:

https://bugs.chromium.org/p/project-zero/issues/detail?id=856#c2

To me, this seems better than taking the cmpxchg performance hit.

-Kees
Peter Zijlstra Nov. 11, 2016, 8:19 p.m. UTC | #14
On Fri, Nov 11, 2016 at 10:00:27AM -0800, Kees Cook wrote:
> Agreed: the first thread will BUG and the second thread is still halfway to 0.
> 
> On systems that panic on BUG, things are protected. For the rest of
> the systems, an alternative to "dec" on overflow is to sub (more than)
> NR_CPUS, to keep the saturation below the overflow level. This means
> that it is still detected (BUG) by at least 1 thread, and cannot reach
> 0 (to trigger the flaw) on all other threads, even if they all lose
> the race.

NR_CPUS has nothing to do with anything here. If this is a vcpu that got
scheduled out in between the inc and dec there can be an arbitrary
amount of other crap happening.

> To me, this seems better than taking the cmpxchg performance hit.

To me that shows you shouldn't be allowed near atomic_t. You cannot have
a non-atomic atomic op. That just doesn't happen.
diff mbox

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bada636..1a68790 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -80,6 +80,7 @@  config X86
 	select HAVE_AOUT			if X86_32
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HARDENED_USERCOPY
+	select HAVE_ARCH_HARDENED_ATOMIC
 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN			if X86_64 && SPARSEMEM_VMEMMAP
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 14635c5..97d9156 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -27,6 +27,18 @@  static __always_inline int atomic_read(const atomic_t *v)
 }
 
 /**
+ * atomic_read_wrap - read atomic variable
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically reads the value of @v.
+ */
+static __always_inline int atomic_read_wrap(const atomic_wrap_t *v)
+{
+	return ACCESS_ONCE((v)->counter);
+}
+#define atomic_read_wrap atomic_read_wrap
+
+/**
  * atomic_set - set atomic variable
  * @v: pointer of type atomic_t
  * @i: required value
@@ -39,6 +51,19 @@  static __always_inline void atomic_set(atomic_t *v, int i)
 }
 
 /**
+ * atomic_set_wrap - set atomic variable
+ * @v: pointer of type atomic_wrap_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static __always_inline void atomic_set_wrap(atomic_wrap_t *v, int i)
+{
+	v->counter = i;
+}
+#define atomic_set_wrap atomic_set_wrap
+
+/**
  * atomic_add - add integer to atomic variable
  * @i: integer value to add
  * @v: pointer of type atomic_t
@@ -47,12 +72,35 @@  static __always_inline void atomic_set(atomic_t *v, int i)
  */
 static __always_inline void atomic_add(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "addl %1,%0"
+	asm volatile(LOCK_PREFIX "addl %1,%0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "subl %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     : "+m" (v->counter)
 		     : "ir" (i));
 }
 
 /**
+ * atomic_add_wrap - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically adds @i to @v.
+ */
+static __always_inline void atomic_add_wrap(int i, atomic_wrap_t *v)
+{
+	asm volatile(LOCK_PREFIX "addl %1,%0\n"
+		     : "+m" (v->counter)
+		     : "ir" (i));
+}
+#define atomic_add_wrap atomic_add_wrap
+
+/**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
  * @v: pointer of type atomic_t
@@ -61,12 +109,35 @@  static __always_inline void atomic_add(int i, atomic_t *v)
  */
 static __always_inline void atomic_sub(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "subl %1,%0"
+	asm volatile(LOCK_PREFIX "subl %1,%0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "addl %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     : "+m" (v->counter)
 		     : "ir" (i));
 }
 
 /**
+ * atomic_sub_wrap - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+static __always_inline void atomic_sub_wrap(int i, atomic_wrap_t *v)
+{
+	asm volatile(LOCK_PREFIX "subl %1,%0\n"
+		     : "+m" (v->counter)
+		     : "ir" (i));
+}
+#define atomic_sub_wrap atomic_sub_wrap
+
+/**
  * atomic_sub_and_test - subtract value from variable and test result
  * @i: integer value to subtract
  * @v: pointer of type atomic_t
@@ -77,10 +148,25 @@  static __always_inline void atomic_sub(int i, atomic_t *v)
  */
 static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", LOCK_PREFIX "addl", v->counter, "er", i, "%0", e);
 }
 
 /**
+ * atomic_sub_and_test_wrap - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __always_inline bool atomic_sub_and_test_wrap(int i, atomic_wrap_t *v)
+{
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
+}
+#define atomic_sub_and_test_wrap atomic_sub_and_test_wrap
+
+/**
  * atomic_inc - increment atomic variable
  * @v: pointer of type atomic_t
  *
@@ -88,9 +174,30 @@  static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
  */
 static __always_inline void atomic_inc(atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "incl %0"
+	asm volatile(LOCK_PREFIX "incl %0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "decl %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (v->counter));
+}
+
+/**
+ * atomic_inc_wrap - increment atomic variable
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically increments @v by 1.
+ */
+static __always_inline void atomic_inc_wrap(atomic_wrap_t *v)
+{
+	asm volatile(LOCK_PREFIX "incl %0\n"
 		     : "+m" (v->counter));
 }
+#define atomic_inc_wrap atomic_inc_wrap
 
 /**
  * atomic_dec - decrement atomic variable
@@ -100,11 +207,32 @@  static __always_inline void atomic_inc(atomic_t *v)
  */
 static __always_inline void atomic_dec(atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "decl %0"
+	asm volatile(LOCK_PREFIX "decl %0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "incl %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     : "+m" (v->counter));
 }
 
 /**
+ * atomic_dec_wrap - decrement atomic variable
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static __always_inline void atomic_dec_wrap(atomic_wrap_t *v)
+{
+	asm volatile(LOCK_PREFIX "decl %0\n"
+		     : "+m" (v->counter));
+}
+#define atomic_dec_wrap atomic_dec_wrap
+
+/**
  * atomic_dec_and_test - decrement and test
  * @v: pointer of type atomic_t
  *
@@ -114,9 +242,15 @@  static __always_inline void atomic_dec(atomic_t *v)
  */
 static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", LOCK_PREFIX "incl", v->counter, "%0", e);
 }
 
+static __always_inline bool atomic_dec_and_test_wrap(atomic_wrap_t *v)
+{
+	GEN_UNARY_RMWcc_wrap(LOCK_PREFIX "decl", v->counter, "%0", e);
+}
+#define atomic_dec_and_test_wrap atomic_dec_and_test_wrap
+
 /**
  * atomic_inc_and_test - increment and test
  * @v: pointer of type atomic_t
@@ -127,10 +261,24 @@  static __always_inline bool atomic_dec_and_test(atomic_t *v)
  */
 static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", LOCK_PREFIX "decl", v->counter, "%0", e);
 }
 
 /**
+ * atomic_inc_and_test_wrap - increment and test
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static __always_inline int atomic_inc_and_test_wrap(atomic_wrap_t *v)
+{
+	GEN_UNARY_RMWcc_wrap(LOCK_PREFIX "incl", v->counter, "%0", e);
+}
+#define atomic_inc_and_test_wrap atomic_inc_and_test_wrap
+
+/**
  * atomic_add_negative - add and test if negative
  * @i: integer value to add
  * @v: pointer of type atomic_t
@@ -141,9 +289,15 @@  static __always_inline bool atomic_inc_and_test(atomic_t *v)
  */
 static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", LOCK_PREFIX "subl", v->counter, "er", i, "%0", s);
 }
 
+static __always_inline bool atomic_add_negative_wrap(int i, atomic_wrap_t *v)
+{
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
+}
+#define atomic_add_negative_wrap atomic_add_negative_wrap
+
 /**
  * atomic_add_return - add integer and return
  * @i: integer value to add
@@ -153,8 +307,21 @@  static __always_inline bool atomic_add_negative(int i, atomic_t *v)
  */
 static __always_inline int atomic_add_return(int i, atomic_t *v)
 {
+	return i + xadd_check_overflow(&v->counter, i);
+}
+
+/**
+ * atomic_add_return_wrap - add integer and return
+ * @i: integer value to add
+ * @v: pointer of type atomic_wrap_t
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static __always_inline int atomic_add_return_wrap(int i, atomic_wrap_t *v)
+{
 	return i + xadd(&v->counter, i);
 }
+#define atomic_add_return_wrap atomic_add_return_wrap
 
 /**
  * atomic_sub_return - subtract integer and return
@@ -168,29 +335,70 @@  static __always_inline int atomic_sub_return(int i, atomic_t *v)
 	return atomic_add_return(-i, v);
 }
 
+static __always_inline int atomic_sub_return_wrap(int i, atomic_wrap_t *v)
+{
+	return atomic_add_return_wrap(-i, v);
+}
+#define atomic_sub_return_wrap atomic_sub_return_wrap
+
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
+static __always_inline int atomic_inc_return_wrap(atomic_wrap_t *v)
+{
+	return atomic_add_return_wrap(1, v);
+}
+#define atomic_inc_return_wrap atomic_inc_return_wrap
+
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
+static __always_inline int atomic_dec_return_wrap(atomic_wrap_t *v)
+{
+	return atomic_sub_return_wrap(1, v);
+}
+#define atomic_dec_return_wrap atomic_dec_return_wrap
 
 static __always_inline int atomic_fetch_add(int i, atomic_t *v)
 {
+	return xadd_check_overflow(&v->counter, i);
+}
+
+static __always_inline int atomic_fetch_add_wrap(int i, atomic_wrap_t *v)
+{
 	return xadd(&v->counter, i);
 }
+#define atomic_fetch_add_wrap atomic_fetch_add_wrap
 
 static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
 {
+	return xadd_check_overflow(&v->counter, -i);
+}
+
+static __always_inline int atomic_fetch_sub_wrap(int i, atomic_wrap_t *v)
+{
 	return xadd(&v->counter, -i);
 }
+#define atomic_fetch_sub_wrap atomic_fetch_sub_wrap
 
 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	return cmpxchg(&v->counter, old, new);
 }
 
+static __always_inline int atomic_cmpxchg_wrap(atomic_wrap_t *v, int old, int new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+#define atomic_cmpxchg_wrap atomic_cmpxchg_wrap
+
 static inline int atomic_xchg(atomic_t *v, int new)
 {
 	return xchg(&v->counter, new);
 }
 
+static inline int atomic_xchg_wrap(atomic_wrap_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
+#define atomic_xchg_wrap atomic_xchg_wrap
+
 #define ATOMIC_OP(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
@@ -236,12 +444,25 @@  ATOMIC_OPS(xor, ^)
  */
 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
+	int c, old, new;
 	c = atomic_read(v);
 	for (;;) {
 		if (unlikely(c == (u)))
 			break;
-		old = atomic_cmpxchg((v), c, c + (a));
+
+		asm volatile("addl %2,%0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+			     "jno 0f\n"
+			     "subl %2,%0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif
+
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
+		old = atomic_cmpxchg((v), c, new);
 		if (likely(old == c))
 			break;
 		c = old;
@@ -250,6 +471,80 @@  static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 }
 
 /**
+ * atomic_add_unless_wrap - add unless the number is already a given value
+ * @v: pointer of type atomic_wrap_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns the old value of @v.
+ */
+static __always_inline int atomic_add_unless_wrap(atomic_wrap_t *v,
+						    int a, int u)
+{
+	int c, old, new;
+	c = atomic_read_wrap(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+
+		asm volatile("addl %2,%0\n"
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
+		old = atomic_cmpxchg_wrap((v), c, new);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != u;
+}
+#define atomic_add_unless_wrap atomic_add_unless_wrap
+
+/**
++ * atomic_inc_not_zero_hint - increment if not null
++ * @v: pointer of type atomic_t
++ * @hint: probable value of the atomic before the increment
++ *
++ * This version of atomic_inc_not_zero() gives a hint of probable
++ * value of the atomic. This helps processor to not read the memory
++ * before doing the atomic read/modify/write cycle, lowering
++ * number of bus transactions on some arches.
++ *
++ * Returns: 0 if increment was not done, 1 otherwise.
++ */
+#define atomic_inc_not_zero_hint atomic_inc_not_zero_hint
+static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
+{
+	int val, c = hint, new;
+
+	/* sanity test, should be removed by compiler if hint is a constant */
+	if (!hint)
+		return __atomic_add_unless(v, 1, 0);
+
+	do {
+		asm volatile("incl %0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+			     "jno 0f\n"
+			     "decl %0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif
+
+			     : "=r" (new)
+			     : "0" (c));
+
+		val = atomic_cmpxchg((v), c, new);
+		if (val == c)
+			return 1;
+		c = val;
+	} while (c);
+
+	return 0;
+}
+
+/**
  * atomic_inc_short - increment of a short integer
  * @v: pointer to type int
  *
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 71d7705..a9281e8 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -11,6 +11,13 @@  typedef struct {
 	u64 __aligned(8) counter;
 } atomic64_t;
 
+#ifndef atomic64_wrap_t
+#define atomic64_wrap_t atomic64_wrap_t
+typedef struct {
+	u64 __aligned(8) counter;
+} atomic64_wrap_t;
+#endif
+
 #define ATOMIC64_INIT(val)	{ (val) }
 
 #define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
@@ -36,21 +43,31 @@  typedef struct {
 	ATOMIC64_DECL_ONE(sym##_386)
 
 ATOMIC64_DECL_ONE(add_386);
+ATOMIC64_DECL_ONE(add_wrap_386);
 ATOMIC64_DECL_ONE(sub_386);
+ATOMIC64_DECL_ONE(sub_wrap_386);
 ATOMIC64_DECL_ONE(inc_386);
+ATOMIC64_DECL_ONE(inc_wrap_386);
 ATOMIC64_DECL_ONE(dec_386);
+ATOMIC64_DECL_ONE(dec_wrap_386);
 #endif
 
 #define alternative_atomic64(f, out, in...) \
 	__alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
 
 ATOMIC64_DECL(read);
+ATOMIC64_DECL(read_wrap);
 ATOMIC64_DECL(set);
+ATOMIC64_DECL(set_wrap);
 ATOMIC64_DECL(xchg);
 ATOMIC64_DECL(add_return);
+ATOMIC64_DECL(add_return_wrap);
 ATOMIC64_DECL(sub_return);
+ATOMIC64_DECL(sub_return_wrap);
 ATOMIC64_DECL(inc_return);
+ATOMIC64_DECL(inc_return_wrap);
 ATOMIC64_DECL(dec_return);
+ATOMIC64_DECL(dec_return_wrap);
 ATOMIC64_DECL(dec_if_positive);
 ATOMIC64_DECL(inc_not_zero);
 ATOMIC64_DECL(add_unless);
@@ -76,6 +93,22 @@  static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n
 }
 
 /**
+ * atomic64_cmpxchg_wrap - cmpxchg atomic64 variable
+ * @p: pointer to type atomic64_wrap_t
+ * @o: expected value
+ * @n: new value
+ *
+ * Atomically sets @v to @n if it was equal to @o and returns
+ * the old value.
+ */
+
+static inline long long atomic64_cmpxchg_wrap(atomic64_wrap_t *v, long long o, long long n)
+{
+	return cmpxchg64(&v->counter, o, n);
+}
+#define atomic64_cmpxchg_wrap atomic64_cmpxchg_wrap
+
+/**
  * atomic64_xchg - xchg atomic64 variable
  * @v: pointer to type atomic64_t
  * @n: value to assign
@@ -95,6 +128,26 @@  static inline long long atomic64_xchg(atomic64_t *v, long long n)
 }
 
 /**
+ * atomic64_xchg_wrap - xchg atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ * @n: value to assign
+ *
+ * Atomically xchgs the value of @v to @n and returns
+ * the old value.
+ */
+static inline long long atomic64_xchg_wrap(atomic64_wrap_t *v, long long n)
+{
+	long long o;
+	unsigned high = (unsigned)(n >> 32);
+	unsigned low = (unsigned)n;
+	alternative_atomic64(xchg, "=&A" (o),
+			     "S" (v), "b" (low), "c" (high)
+			     : "memory");
+	return o;
+}
+#define atomic64_xchg_wrap atomic64_xchg_wrap
+
+/**
  * atomic64_set - set atomic64 variable
  * @v: pointer to type atomic64_t
  * @i: value to assign
@@ -111,6 +164,22 @@  static inline void atomic64_set(atomic64_t *v, long long i)
 }
 
 /**
+ * atomic64_set_wrap - set atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ * @n: value to assign
+ *
+ * Atomically sets the value of @v to @n.
+ */
+static inline void atomic64_set_wrap(atomic64_wrap_t *v, long long i)
+{
+	unsigned high = (unsigned)(i >> 32);
+	unsigned low = (unsigned)i;
+	alternative_atomic64(set, /* no output */,
+			     "S" (v), "b" (low), "c" (high)
+			     : "eax", "edx", "memory");
+}
+
+/**
  * atomic64_read - read atomic64 variable
  * @v: pointer to type atomic64_t
  *
@@ -121,7 +190,20 @@  static inline long long atomic64_read(const atomic64_t *v)
 	long long r;
 	alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
 	return r;
- }
+}
+
+/**
+ * atomic64_read_wrap - read atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically reads the value of @v and returns it.
+ */
+static inline long long atomic64_read_wrap(const atomic64_wrap_t *v)
+{
+	long long r;
+	alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+	return r;
+}
 
 /**
  * atomic64_add_return - add and return
@@ -138,6 +220,21 @@  static inline long long atomic64_add_return(long long i, atomic64_t *v)
 	return i;
 }
 
+/**
+ * atomic64_add_return_wrap - add and return
+ * @i: integer value to add
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically adds @i to @v and returns @i + *@v
+ */
+static inline long long atomic64_add_return_wrap(long long i, atomic64_wrap_t *v)
+{
+	alternative_atomic64(add_return_wrap,
+			     ASM_OUTPUT2("+A" (i), "+c" (v)),
+			     ASM_NO_INPUT_CLOBBER("memory"));
+	return i;
+}
+
 /*
  * Other variants with different arithmetic operators:
  */
@@ -149,6 +246,14 @@  static inline long long atomic64_sub_return(long long i, atomic64_t *v)
 	return i;
 }
 
+static inline long long atomic64_sub_return_wrap(long long i, atomic64_wrap_t *v)
+{
+	alternative_atomic64(sub_return_wrap,
+			     ASM_OUTPUT2("+A" (i), "+c" (v)),
+			     ASM_NO_INPUT_CLOBBER("memory"));
+	return i;
+}
+
 static inline long long atomic64_inc_return(atomic64_t *v)
 {
 	long long a;
@@ -157,6 +262,14 @@  static inline long long atomic64_inc_return(atomic64_t *v)
 	return a;
 }
 
+static inline long long atomic64_inc_return_wrap(atomic64_wrap_t *v)
+{
+	long long a;
+	alternative_atomic64(inc_return_wrap, "=&A" (a),
+			     "S" (v) : "memory", "ecx");
+	return a;
+}
+
 static inline long long atomic64_dec_return(atomic64_t *v)
 {
 	long long a;
@@ -165,6 +278,14 @@  static inline long long atomic64_dec_return(atomic64_t *v)
 	return a;
 }
 
+static inline long long atomic64_dec_return_wrap(atomic64_wrap_t *v)
+{
+	long long a;
+	alternative_atomic64(dec_return_wrap, "=&A" (a),
+			     "S" (v) : "memory", "ecx");
+	return a;
+}
+
 /**
  * atomic64_add - add integer to atomic64 variable
  * @i: integer value to add
@@ -179,6 +300,23 @@  static inline long long atomic64_add(long long i, atomic64_t *v)
 			       ASM_NO_INPUT_CLOBBER("memory"));
 	return i;
 }
+#define atomic64_add_wrap atomic64_add_wrap
+
+
+/**
+ * atomic64_add_wrap - add integer to atomic64 variable
+ * @i: integer value to add
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline long long atomic64_add_wrap(long long i, atomic64_wrap_t *v)
+{
+	__alternative_atomic64(add_wrap, add_return_wrap,
+			       ASM_OUTPUT2("+A" (i), "+c" (v)),
+			       ASM_NO_INPUT_CLOBBER("memory"));
+	return i;
+}
 
 /**
  * atomic64_sub - subtract the atomic64 variable
@@ -196,6 +334,22 @@  static inline long long atomic64_sub(long long i, atomic64_t *v)
 }
 
 /**
+ * atomic64_sub_wrap - subtract the atomic64 variable
+ * @i: integer value to subtract
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+static inline long long atomic64_sub_wrap(long long i, atomic64_wrap_t *v)
+{
+	__alternative_atomic64(sub_wrap, sub_return_wrap,
+			       ASM_OUTPUT2("+A" (i), "+c" (v)),
+			       ASM_NO_INPUT_CLOBBER("memory"));
+	return i;
+}
+#define atomic64_sub_wrap atomic64_sub_wrap
+
+/**
  * atomic64_sub_and_test - subtract value from variable and test result
  * @i: integer value to subtract
  * @v: pointer to type atomic64_t
@@ -209,6 +363,13 @@  static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
 	return atomic64_sub_return(i, v) == 0;
 }
 
+static inline int atomic64_sub_and_test_wrap(long long i, atomic64_wrap_t *v)
+{
+	return atomic64_sub_return_wrap(i, v) == 0;
+}
+#define atomic64_sub_and_test_wrap atomic64_sub_and_test_wrap
+
+
 /**
  * atomic64_inc - increment atomic64 variable
  * @v: pointer to type atomic64_t
@@ -222,6 +383,19 @@  static inline void atomic64_inc(atomic64_t *v)
 }
 
 /**
+ * atomic64_inc_wrap - increment atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic64_inc_wrap(atomic64_wrap_t *v)
+{
+	__alternative_atomic64(inc_wrap, inc_return_wrap, /* no output */,
+			       "S" (v) : "memory", "eax", "ecx", "edx");
+}
+#define atomic64_inc_wrap atomic64_inc_wrap
+
+/**
  * atomic64_dec - decrement atomic64 variable
  * @v: pointer to type atomic64_t
  *
@@ -234,6 +408,19 @@  static inline void atomic64_dec(atomic64_t *v)
 }
 
 /**
+ * atomic64_dec_wrap - decrement atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static inline void atomic64_dec_wrap(atomic64_wrap_t *v)
+{
+	__alternative_atomic64(dec_wrap, dec_return_wrap, /* no output */,
+			       "S" (v) : "memory", "eax", "ecx", "edx");
+}
+#define atomic64_dec_wrap atomic64_dec_wrap
+
+/**
  * atomic64_dec_and_test - decrement and test
  * @v: pointer to type atomic64_t
  *
@@ -246,6 +433,13 @@  static inline int atomic64_dec_and_test(atomic64_t *v)
 	return atomic64_dec_return(v) == 0;
 }
 
+static inline int atomic64_dec_and_test_wrap(atomic64_wrap_t *v)
+{
+	return atomic64_dec_return_wrap(v) == 0;
+}
+#define atomic64_dec_and_test_wrap atomic64_dec_and_test_wrap
+
+
 /**
  * atomic64_inc_and_test - increment and test
  * @v: pointer to type atomic64_t
@@ -259,6 +453,12 @@  static inline int atomic64_inc_and_test(atomic64_t *v)
 	return atomic64_inc_return(v) == 0;
 }
 
+static inline int atomic64_inc_and_test_wrap(atomic64_wrap_t *v)
+{
+	return atomic64_inc_return_wrap(v) == 0;
+}
+#define atomic64_inc_and_test_wrap atomic64_inc_and_test_wrap
+
 /**
  * atomic64_add_negative - add and test if negative
  * @i: integer value to add
@@ -273,6 +473,12 @@  static inline int atomic64_add_negative(long long i, atomic64_t *v)
 	return atomic64_add_return(i, v) < 0;
 }
 
+static inline int atomic64_add_negative_wrap(long long i, atomic64_wrap_t *v)
+{
+	return atomic64_add_return_wrap(i, v) < 0;
+}
+#define atomic64_add_negative_wrap atomic64_add_negative_wrap
+
 /**
  * atomic64_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
@@ -292,6 +498,25 @@  static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 	return (int)a;
 }
 
+/**
+ * atomic64_add_unless_wrap - add unless the number is a given value
+ * @v: pointer of type atomic64_wrap_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if the add was done, zero otherwise.
+ */
+static inline int atomic64_add_unless_wrap(atomic64_wrap_t *v, long long a, long long u)
+{
+	unsigned low = (unsigned)u;
+	unsigned high = (unsigned)(u >> 32);
+	alternative_atomic64(add_unless,
+			     ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
+			     "S" (v) : "memory");
+	return (int)a;
+}
+#define atomic64_add_unless_wrap atomic64_add_unless_wrap
 
 static inline int atomic64_inc_not_zero(atomic64_t *v)
 {
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 89ed2f6..c218ead 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -5,6 +5,13 @@ 
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
 
+#ifndef atomic64_wrap_t
+#define atomic64_wrap_t atomic64_wrap_t
+typedef struct {
+	long counter;
+} atomic64_wrap_t;
+#endif
+
 /* The 64-bit atomic type */
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -22,6 +29,19 @@  static inline long atomic64_read(const atomic64_t *v)
 }
 
 /**
+ * atomic64_read_wrap - read atomic64 variable
+ * @v: pointer of type atomic64_wrap_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+static inline long atomic64_read_wrap(const atomic64_wrap_t *v)
+{
+	return ACCESS_ONCE((v)->counter);
+}
+#define atomic64_read_wrap atomic64_read_wrap
+
+/**
  * atomic64_set - set atomic64 variable
  * @v: pointer to type atomic64_t
  * @i: required value
@@ -34,6 +54,19 @@  static inline void atomic64_set(atomic64_t *v, long i)
 }
 
 /**
+ * atomic64_set_wrap - set atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic64_set_wrap(atomic64_wrap_t *v, long i)
+{
+	v->counter = i;
+}
+#define atomic64_set_wrap atomic64_set_wrap
+
+/**
  * atomic64_add - add integer to atomic64 variable
  * @i: integer value to add
  * @v: pointer to type atomic64_t
@@ -42,10 +75,33 @@  static inline void atomic64_set(atomic64_t *v, long i)
  */
 static __always_inline void atomic64_add(long i, atomic64_t *v)
 {
+	asm volatile(LOCK_PREFIX "addq %1,%0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "subq %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "=m" (v->counter)
+		     : "er" (i), "m" (v->counter));
+}
+
+/**
+ * atomic64_add_wrap - add integer to atomic64 variable
+ * @i: integer value to add
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically adds @i to @v.
+ */
+static __always_inline void atomic64_add_wrap(long i, atomic64_wrap_t *v)
+{
 	asm volatile(LOCK_PREFIX "addq %1,%0"
 		     : "=m" (v->counter)
 		     : "er" (i), "m" (v->counter));
 }
+#define atomic64_add_wrap atomic64_add_wrap
 
 /**
  * atomic64_sub - subtract the atomic64 variable
@@ -56,10 +112,31 @@  static __always_inline void atomic64_add(long i, atomic64_t *v)
  */
 static inline void atomic64_sub(long i, atomic64_t *v)
 {
+	asm volatile(LOCK_PREFIX "subq %1,%0\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "addq %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "=m" (v->counter)
+		     : "er" (i), "m" (v->counter));
+}
+
+/**
++ * atomic64_sub_wrap - subtract the atomic64 variable
++ * @i: integer value to subtract
++ * @v: pointer to type atomic64_wrap_t
++ *
++ * Atomically subtracts @i from @v.
++ */
+static inline void atomic64_sub_wrap(long i, atomic64_wrap_t *v)
+{
 	asm volatile(LOCK_PREFIX "subq %1,%0"
 		     : "=m" (v->counter)
 		     : "er" (i), "m" (v->counter));
 }
+#define atomic64_sub_wrap atomic64_sub_wrap
 
 /**
  * atomic64_sub_and_test - subtract value from variable and test result
@@ -72,10 +149,25 @@  static inline void atomic64_sub(long i, atomic64_t *v)
  */
 static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", LOCK_PREFIX "addq", v->counter, "er", i, "%0", e);
 }
 
 /**
+ * atomic64_sub_and_test_wrap - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline bool atomic64_sub_and_test_wrap(long i, atomic64_wrap_t *v)
+{
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
+}
+#define atomic64_sub_and_test_wrap atomic64_sub_and_test_wrap
+
+/**
  * atomic64_inc - increment atomic64 variable
  * @v: pointer to type atomic64_t
  *
@@ -83,10 +175,31 @@  static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
  */
 static __always_inline void atomic64_inc(atomic64_t *v)
 {
+	asm volatile(LOCK_PREFIX "incq %0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "decq %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "=m" (v->counter)
+		     : "m" (v->counter));
+}
+
+/**
+ * atomic64_inc_wrap - increment atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically increments @v by 1.
+ */
+static __always_inline void atomic64_inc_wrap(atomic64_wrap_t *v)
+{
 	asm volatile(LOCK_PREFIX "incq %0"
 		     : "=m" (v->counter)
 		     : "m" (v->counter));
 }
+#define atomic64_inc_wrap atomic64_inc_wrap
 
 /**
  * atomic64_dec - decrement atomic64 variable
@@ -96,10 +209,31 @@  static __always_inline void atomic64_inc(atomic64_t *v)
  */
 static __always_inline void atomic64_dec(atomic64_t *v)
 {
+	asm volatile(LOCK_PREFIX "decq %0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX "incq %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "=m" (v->counter)
+		     : "m" (v->counter));
+}
+
+/**
+ * atomic64_dec_wrap - decrement atomic64 variable
+ * @v: pointer to type atomic64_wrap_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static __always_inline void atomic64_dec_wrap(atomic64_wrap_t *v)
+{
 	asm volatile(LOCK_PREFIX "decq %0"
 		     : "=m" (v->counter)
 		     : "m" (v->counter));
 }
+#define atomic64_dec_wrap atomic64_dec_wrap
 
 /**
  * atomic64_dec_and_test - decrement and test
@@ -111,8 +245,14 @@  static __always_inline void atomic64_dec(atomic64_t *v)
  */
 static inline bool atomic64_dec_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", LOCK_PREFIX "incq", v->counter, "%0", e);
+}
+
+static inline bool atomic64_dec_and_test_wrap(atomic64_wrap_t *v)
+{
+	GEN_UNARY_RMWcc_wrap(LOCK_PREFIX "decq", v->counter, "%0", e);
 }
+#define atomic64_dec_and_test_wrap atomic64_dec_and_test_wrap
 
 /**
  * atomic64_inc_and_test - increment and test
@@ -124,8 +264,14 @@  static inline bool atomic64_dec_and_test(atomic64_t *v)
  */
 static inline bool atomic64_inc_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", LOCK_PREFIX "decq", v->counter, "%0", e);
+}
+
+static inline bool atomic64_inc_and_test_wrap(atomic64_wrap_t *v)
+{
+	GEN_UNARY_RMWcc_wrap(LOCK_PREFIX "incq", v->counter, "%0", e);
 }
+#define atomic64_inc_and_test_wrap atomic64_inc_and_test_wrap
 
 /**
  * atomic64_add_negative - add and test if negative
@@ -138,9 +284,15 @@  static inline bool atomic64_inc_and_test(atomic64_t *v)
  */
 static inline bool atomic64_add_negative(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", LOCK_PREFIX "subq", v->counter, "er", i, "%0", s);
 }
 
+static inline bool atomic64_add_negative_wrap(long i, atomic64_wrap_t *v)
+{
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
+}
+#define atomic64_add_negative_wrap atomic64_add_negative_wrap
+
 /**
  * atomic64_add_return - add and return
  * @i: integer value to add
@@ -150,14 +302,26 @@  static inline bool atomic64_add_negative(long i, atomic64_t *v)
  */
 static __always_inline long atomic64_add_return(long i, atomic64_t *v)
 {
+	return i + xadd_check_overflow(&v->counter, i);
+}
+
+static __always_inline long atomic64_add_return_wrap(long i, atomic64_wrap_t *v)
+{
 	return i + xadd(&v->counter, i);
 }
+#define atomic64_add_return_wrap atomic64_add_return_wrap
 
 static inline long atomic64_sub_return(long i, atomic64_t *v)
 {
 	return atomic64_add_return(-i, v);
 }
 
+static inline long atomic64_sub_return_wrap(long i, atomic64_wrap_t *v)
+{
+	return atomic64_add_return_wrap(-i, v);
+}
+#define atomic64_sub_return_wrap atomic64_sub_return_wrap
+
 static inline long atomic64_fetch_add(long i, atomic64_t *v)
 {
 	return xadd(&v->counter, i);
@@ -171,16 +335,31 @@  static inline long atomic64_fetch_sub(long i, atomic64_t *v)
 #define atomic64_inc_return(v)  (atomic64_add_return(1, (v)))
 #define atomic64_dec_return(v)  (atomic64_sub_return(1, (v)))
 
+#define atomic64_inc_return_wrap(v)  (atomic64_add_return_wrap(1, (v)))
+#define atomic64_dec_return_wrap(v)  (atomic64_sub_return_wrap(1, (v)))
+
 static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
 {
 	return cmpxchg(&v->counter, old, new);
 }
 
+static inline long atomic64_cmpxchg_wrap(atomic64_wrap_t *v, long old, long new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+#define atomic64_cmpxchg_wrap atomic64_cmpxchg_wrap
+
 static inline long atomic64_xchg(atomic64_t *v, long new)
 {
 	return xchg(&v->counter, new);
 }
 
+static inline long atomic64_xchg_wrap(atomic64_wrap_t *v, long new)
+{
+	return xchg(&v->counter, new);
+}
+#define atomic64_xchg_wrap atomic64_xchg_wrap
+
 /**
  * atomic64_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
@@ -192,11 +371,21 @@  static inline long atomic64_xchg(atomic64_t *v, long new)
  */
 static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
+	long c, old, new;
 	c = atomic64_read(v);
 	for (;;) {
 		if (unlikely(c == (u)))
 			break;
+		asm volatile("add %2,%0\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+			     "jno 0f\n"
+			     "sub %2,%0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
 		old = atomic64_cmpxchg((v), c, c + (a));
 		if (likely(old == c))
 			break;
@@ -205,6 +394,26 @@  static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
+static inline bool atomic64_add_unless_wrap(atomic64_wrap_t *v, long a, long u)
+{
+	long c, old, new;
+	c = atomic64_read_wrap(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		asm volatile("add %2,%0\n"
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
+		old = atomic64_cmpxchg_wrap((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+#define atomic64_add_unless_wrap atomic64_add_unless_wrap
+
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
 /*
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 68557f52..e25eb0d 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -50,7 +50,7 @@ 
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((volatile void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -203,7 +203,7 @@  static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -249,7 +249,7 @@  static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
  */
 static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -302,7 +302,7 @@  static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
  */
 static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
+	GEN_BINARY_RMWcc_wrap(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
 }
 
 static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 97848cd..456446fe 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -13,10 +13,14 @@  extern void __xchg_wrong_size(void)
 	__compiletime_error("Bad argument size for xchg");
 extern void __cmpxchg_wrong_size(void)
 	__compiletime_error("Bad argument size for cmpxchg");
+extern void __xadd_check_overflow_wrong_size(void)
+	__compiletime_error("Bad argument size for xadd_check_overflow");
 extern void __xadd_wrong_size(void)
 	__compiletime_error("Bad argument size for xadd");
 extern void __add_wrong_size(void)
 	__compiletime_error("Bad argument size for add");
+extern void __add_check_overflow_wrong_size(void)
+	__compiletime_error("Bad argument size for add_check_overflow");
 
 /*
  * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -68,6 +72,38 @@  extern void __add_wrong_size(void)
 		__ret;							\
 	})
 
+#ifdef CONFIG_HARDENED_ATOMIC
+#define __xchg_op_check_overflow(ptr, arg, op, lock)			\
+	({								\
+	        __typeof__ (*(ptr)) __ret = (arg);			\
+		switch (sizeof(*(ptr))) {				\
+		case __X86_CASE_L:					\
+			asm volatile (lock #op "l %0, %1\n"		\
+				      "jno 0f\n"			\
+				      "mov %0,%1\n"			\
+				      "int $4\n0:\n"			\
+				      _ASM_EXTABLE(0b, 0b)		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_Q:					\
+			asm volatile (lock #op "q %q0, %1\n"		\
+				      "jno 0f\n"			\
+				      "mov %0,%1\n"			\
+				      "int $4\n0:\n"			\
+				      _ASM_EXTABLE(0b, 0b)		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		default:						\
+			__ ## op ## _check_overflow_wrong_size();	\
+		}							\
+		__ret;							\
+	})
+#else
+#define __xchg_op_check_overflow(ptr, arg, op, lock) __xchg_op(ptr, arg, op, lock)
+#endif
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
  * Since this is generally used to protect other memory information, we
@@ -162,6 +198,9 @@  extern void __add_wrong_size(void)
 #define __xadd(ptr, inc, lock)	__xchg_op((ptr), (inc), xadd, lock)
 #define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
 
+#define __xadd_check_overflow(ptr, inc, lock)	__xchg_op_check_overflow((ptr), (inc), xadd, lock)
+#define xadd_check_overflow(ptr, inc)		__xadd_check_overflow((ptr), (inc), LOCK_PREFIX)
+
 #define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)			\
 ({									\
 	bool __ret;							\
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index b6ab86c..4da1531 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -10,27 +10,69 @@  typedef struct {
 	atomic_long_t a;
 } local_t;
 
-#include <asm-generic/local_wrap.h>
+typedef struct {
+	atomic_long_wrap_t a;
+} local_wrap_t;
 
 #define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
 #define local_read(l)	atomic_long_read(&(l)->a)
+#define local_read_wrap(l)	atomic_long_read_wrap(&(l)->a)
 #define local_set(l, i)	atomic_long_set(&(l)->a, (i))
+#define local_set_wrap(l, i)	atomic_long_set_wrap(&(l)->a, (i))
 
 static inline void local_inc(local_t *l)
 {
+	asm volatile(_ASM_INC "%0\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     _ASM_DEC "%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "+m" (l->a.counter));
+}
+
+static inline void local_inc_wrap(local_wrap_t *l)
+{
 	asm volatile(_ASM_INC "%0"
 		     : "+m" (l->a.counter));
 }
 
 static inline void local_dec(local_t *l)
 {
+	asm volatile(_ASM_DEC "%0\n"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     _ASM_INC "%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "+m" (l->a.counter));
+}
+
+static inline void local_dec_wrap(local_wrap_t *l)
+{
 	asm volatile(_ASM_DEC "%0"
 		     : "+m" (l->a.counter));
 }
 
 static inline void local_add(long i, local_t *l)
 {
+	asm volatile(_ASM_ADD "%1,%0\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     _ASM_SUB "%1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "+m" (l->a.counter)
+		     : "ir" (i));
+}
+
+static inline void local_add_wrap(long i, local_wrap_t *l)
+{
 	asm volatile(_ASM_ADD "%1,%0"
 		     : "+m" (l->a.counter)
 		     : "ir" (i));
@@ -38,6 +80,19 @@  static inline void local_add(long i, local_t *l)
 
 static inline void local_sub(long i, local_t *l)
 {
+	asm volatile(_ASM_SUB "%1,%0\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     _ASM_ADD "%1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "+m" (l->a.counter)
+		     : "ir" (i));
+}
+
+static inline void local_sub_wrap(long i, local_wrap_t *l)
+{
 	asm volatile(_ASM_SUB "%1,%0"
 		     : "+m" (l->a.counter)
 		     : "ir" (i));
@@ -54,7 +109,12 @@  static inline void local_sub(long i, local_t *l)
  */
 static inline bool local_sub_and_test(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
+	GEN_BINARY_RMWcc(_ASM_SUB, _ASM_ADD, l->a.counter, "er", i, "%0", e);
+}
+
+static inline bool local_sub_and_test_wrap(long i, local_wrap_t *l)
+{
+	GEN_BINARY_RMWcc_wrap(_ASM_SUB, l->a.counter, "er", i, "%0", e);
 }
 
 /**
@@ -67,7 +127,12 @@  static inline bool local_sub_and_test(long i, local_t *l)
  */
 static inline bool local_dec_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
+	GEN_UNARY_RMWcc(_ASM_DEC, _ASM_INC, l->a.counter, "%0", e);
+}
+
+static inline bool local_dec_and_test_wrap(local_wrap_t *l)
+{
+	GEN_UNARY_RMWcc_wrap(_ASM_DEC, l->a.counter, "%0", e);
 }
 
 /**
@@ -80,7 +145,12 @@  static inline bool local_dec_and_test(local_t *l)
  */
 static inline bool local_inc_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
+	GEN_UNARY_RMWcc(_ASM_INC, _ASM_DEC, l->a.counter, "%0", e);
+}
+
+static inline bool local_inc_and_test_wrap(local_wrap_t *l)
+{
+	GEN_UNARY_RMWcc_wrap(_ASM_INC, l->a.counter, "%0", e);
 }
 
 /**
@@ -94,7 +164,12 @@  static inline bool local_inc_and_test(local_t *l)
  */
 static inline bool local_add_negative(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
+	GEN_BINARY_RMWcc(_ASM_ADD, _ASM_SUB, l->a.counter, "er", i, "%0", s);
+}
+
+static inline bool local_add_negative_wrap(long i, local_wrap_t *l)
+{
+	GEN_BINARY_RMWcc_wrap(_ASM_ADD, l->a.counter, "er", i, "%0", s);
 }
 
 /**
@@ -107,6 +182,28 @@  static inline bool local_add_negative(long i, local_t *l)
 static inline long local_add_return(long i, local_t *l)
 {
 	long __i = i;
+	asm volatile(_ASM_XADD "%0, %1\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     _ASM_MOV "%0,%1\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+		     : "+r" (i), "+m" (l->a.counter)
+		     : : "memory");
+	return i + __i;
+}
+
+/**
+ * local_add_return_wrap - add and return
+ * @i: integer value to add
+ * @l: pointer to type local_wrap_t
+ *
+ * Atomically adds @i to @l and returns @i + @l
+ */
+static inline long local_add_return_wrap(long i, local_wrap_t *l)
+{
+	long __i = i;
 	asm volatile(_ASM_XADD "%0, %1;"
 		     : "+r" (i), "+m" (l->a.counter)
 		     : : "memory");
@@ -118,11 +215,20 @@  static inline long local_sub_return(long i, local_t *l)
 	return local_add_return(-i, l);
 }
 
+static inline long local_sub_return_wrap(long i, local_wrap_t *l)
+{
+	return local_add_return_wrap(-i, l);
+}
+
 #define local_inc_return(l)  (local_add_return(1, l))
 #define local_dec_return(l)  (local_sub_return(1, l))
+#define local_inc_return_wrap(l)  (local_add_return_wrap(1, l))
+#define local_dec_return_wrap(l)  (local_sub_return_wrap(1, l))
 
 #define local_cmpxchg(l, o, n) \
 	(cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_cmpxchg_wrap(l, o, n) \
+	(cmpxchg_local(&((l)->a.counter), (o), (n)))
 /* Always has a lock prefix */
 #define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
 
@@ -135,20 +241,47 @@  static inline long local_sub_return(long i, local_t *l)
  * Atomically adds @a to @l, so long as it was not @u.
  * Returns non-zero if @l was not @u, and zero otherwise.
  */
-#define local_add_unless(l, a, u)				\
+static inline long local_add_unless(local_t *l, long a, long u)
+{
+	long c, old, new;
+	c = local_read((l));
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+
+		asm volatile(_ASM_ADD "%2,%0\n"
+#ifdef CONFIG_HARDENED_ATOMIC
+			     "jno 0f\n"
+			     _ASM_SUB "%2,%0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif /* CONFIG_HARDENED_ATOMIC */
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
+		old = local_cmpxchg((l), c, new);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+
+#define local_add_unless_wrap(l, a, u)				\
 ({								\
 	long c, old;						\
-	c = local_read((l));					\
+	c = local_read_wrap((l));				\
 	for (;;) {						\
 		if (unlikely(c == (u)))				\
 			break;					\
-		old = local_cmpxchg((l), c, c + (a));		\
+		old = local_cmpxchg_wrap((l), c, c + (a));	\
 		if (likely(old == c))				\
 			break;					\
 		c = old;					\
 	}							\
 	c != (u);						\
 })
+
 #define local_inc_not_zero(l) local_add_unless((l), 1, 0)
 
 /* On x86_32, these are no better than the atomic variants.
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 17f2186..2fa0e84 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -81,7 +81,7 @@  static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
+    GEN_UNARY_RMWcc("decl", "incl", __preempt_count, __percpu_arg(0), e);
 }
 
 /*
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 661dd30..0375d3f 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -5,28 +5,80 @@ 
 
 /* Use asm goto */
 
-#define __GEN_RMWcc(fullop, var, cc, ...)				\
+#ifdef CONFIG_HARDENED_ATOMIC
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
 do {									\
-	asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"		\
+	asm_volatile_goto (fullop					\
+			";jno 0f\n"					\
+			fullantiop					\
+			";int $4\n0:\n"					\
+			_ASM_EXTABLE(0b, 0b)				\
+			 ";j" #cc " %l[cc_label]"			\
 			: : "m" (var), ## __VA_ARGS__ 			\
 			: "memory" : cc_label);				\
 	return 0;							\
 cc_label:								\
 	return 1;							\
 } while (0)
+#else
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
+do {									\
+	asm_volatile_goto (fullop ";j" #cc " %l[cc_label]"		\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
+#endif
 
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\
-	__GEN_RMWcc(op " " arg0, var, cc)
-
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
-	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
+#define __GEN_RMWcc_wrap(fullop, var, cc, ...)do {									\
+	asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"		\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
 
+#define GEN_UNARY_RMWcc(op, antiop, var, arg0, cc) 			\
+	__GEN_RMWcc(op " " arg0, antiop " " arg0, var, cc)
+#define GEN_UNARY_RMWcc_wrap(op, var, arg0, cc) 			\
+	__GEN_RMWcc_wrap(op " " arg0, var, cc)
+#define GEN_BINARY_RMWcc(op, antiop, var, vcon, val, arg0, cc)		\
+	__GEN_RMWcc(op " %1, " arg0, antiop " %1, " arg0, var, cc, vcon (val))
+#define GEN_BINARY_RMWcc_wrap(op, var, vcon, val, arg0, cc)	\
+	__GEN_RMWcc_wrap(op " %1, " arg0, var, cc, vcon (val))
 #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 /* Use flags output or a set instruction */
 
-#define __GEN_RMWcc(fullop, var, cc, ...)				\
+#ifdef CONFIG_HARDENED_ATOMIC
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
 do {									\
+	char c;								\
+	asm volatile (fullop 						\
+			";jno 0f\n"					\
+			fullantiop					\
+			";int $4\n0:\n"					\
+			_ASM_EXTABLE(0b, 0b)				\
+			";" CC_SET(cc)				\
+			: "+m" (var), CC_OUT(cc) (c)			\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+#else
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
+do {									\
+	char c;								\
+	asm volatile (fullop ";" CC_SET(cc)				\
+			: "+m" (var), CC_OUT(cc) (c)			\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+#endif
+
+#define __GEN_RMWcc_wrap(fullop, var, cc, ...)do {									\
 	bool c;								\
 	asm volatile (fullop ";" CC_SET(cc)				\
 			: "+m" (var), CC_OUT(cc) (c)			\
@@ -34,12 +86,14 @@  do {									\
 	return c;							\
 } while (0)
 
-#define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
-	__GEN_RMWcc(op " " arg0, var, cc)
-
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
-	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
-
+#define GEN_UNARY_RMWcc(op, antiop, var, arg0, cc)			\
+	__GEN_RMWcc(op " " arg0, antiop " " arg0, var, cc)
+#define GEN_UNARY_RMWcc_wrap(op, var, arg0, cc)			\
+	__GEN_RMWcc_wrap(op " " arg0, var, cc)
+#define GEN_BINARY_RMWcc(op, antiop, var, vcon, val, arg0, cc)		\
+	__GEN_RMWcc(op " %2, " arg0, antiop " %2, " arg0, var, cc, vcon (val))
+#define GEN_BINARY_RMWcc_wrap(op, var, vcon, val, arg0, cc)	\
+	__GEN_RMWcc_wrap(op " %2, " arg0, var, cc, vcon (val))
 #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4..05e69df 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -64,6 +64,14 @@  static inline void __down_read(struct rw_semaphore *sem)
 {
 	asm volatile("# beginning down_read\n\t"
 		     LOCK_PREFIX _ASM_INC "(%1)\n\t"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX _ASM_DEC "(%1)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* adds 0x00000001 */
 		     "  jns        1f\n"
 		     "  call call_rwsem_down_read_failed\n"
@@ -85,6 +93,14 @@  static inline bool __down_read_trylock(struct rw_semaphore *sem)
 		     "1:\n\t"
 		     "  mov          %1,%2\n\t"
 		     "  add          %3,%2\n\t"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     "sub %3,%2\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     "  jle	     2f\n\t"
 		     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
 		     "  jnz	     1b\n\t"
@@ -99,6 +115,15 @@  static inline bool __down_read_trylock(struct rw_semaphore *sem)
 /*
  * lock for writing
  */
+#ifdef CONFIG_HARDENED_ATOMIC
+#define ____down_write_undo \
+		     "jno 0f\n"\
+		     "mov %1,(%2)\n"\
+		     "int $4\n0:\n"\
+		     _ASM_EXTABLE(0b, 0b)
+#else
+#define ____down_write_undo
+#endif
 #define ____down_write(sem, slow_path)			\
 ({							\
 	long tmp;					\
@@ -107,6 +132,7 @@  static inline bool __down_read_trylock(struct rw_semaphore *sem)
 							\
 	asm volatile("# beginning down_write\n\t"	\
 		     LOCK_PREFIX "  xadd      %1,(%4)\n\t"	\
+		     ____down_write_undo		\
 		     /* adds 0xffff0001, returns the old value */ \
 		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
 		     /* was the active mask 0 before? */\
@@ -168,6 +194,14 @@  static inline void __up_read(struct rw_semaphore *sem)
 	long tmp;
 	asm volatile("# beginning __up_read\n\t"
 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     "mov %1,(%2)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* subtracts 1, returns the old value */
 		     "  jns        1f\n\t"
 		     "  call call_rwsem_wake\n" /* expects old value in %edx */
@@ -186,6 +220,14 @@  static inline void __up_write(struct rw_semaphore *sem)
 	long tmp;
 	asm volatile("# beginning __up_write\n\t"
 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     "mov %1,(%2)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* subtracts 0xffff0001, returns the old value */
 		     "  jns        1f\n\t"
 		     "  call call_rwsem_wake\n" /* expects old value in %edx */
@@ -203,6 +245,14 @@  static inline void __downgrade_write(struct rw_semaphore *sem)
 {
 	asm volatile("# beginning __downgrade_write\n\t"
 		     LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
+
+#ifdef CONFIG_HARDENED_ATOMIC
+		     "jno 0f\n"
+		     LOCK_PREFIX _ASM_SUB "%2,(%1)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /*
 		      * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
 		      *     0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bd4e3d4..d67a914 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -191,6 +191,10 @@  do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 			tsk->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
 		}
+
+		if (trapnr == X86_TRAP_OF)
+			hardened_atomic_overflow(regs);
+
 		return 0;
 	}
 
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 9b0ca8f..0e8a888 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -45,6 +45,10 @@  BEGIN(read)
 	movl  (v), %eax
 	movl 4(v), %edx
 RET_ENDP
+BEGIN(read_wrap)
+	movl  (v), %eax
+	movl 4(v), %edx
+RET_ENDP
 #undef v
 
 #define v %esi
@@ -52,6 +56,10 @@  BEGIN(set)
 	movl %ebx,  (v)
 	movl %ecx, 4(v)
 RET_ENDP
+BEGIN(set_wrap)
+	movl %ebx,  (v)
+	movl %ecx, 4(v)
+RET_ENDP
 #undef v
 
 #define v  %esi
@@ -67,6 +75,18 @@  RET_ENDP
 BEGIN(add)
 	addl %eax,  (v)
 	adcl %edx, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+	jno 0f
+	subl %eax,  (v)
+	sbbl %edx, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+RET_ENDP
+BEGIN(add_wrap)
+	addl %eax,  (v)
+	adcl %edx, 4(v)
 RET_ENDP
 #undef v
 
@@ -74,6 +94,20 @@  RET_ENDP
 BEGIN(add_return)
 	addl  (v), %eax
 	adcl 4(v), %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+	movl %eax,  (v)
+	movl %edx, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+2:
+#endif
+RET_ENDP
+BEGIN(add_return_wrap)
+	addl  (v), %eax
+	adcl 4(v), %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -83,6 +117,18 @@  RET_ENDP
 BEGIN(sub)
 	subl %eax,  (v)
 	sbbl %edx, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+	jno 0f
+	addl %eax,  (v)
+	adcl %edx, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+RET_ENDP
+BEGIN(sub_wrap)
+	subl %eax,  (v)
+	sbbl %edx, 4(v)
 RET_ENDP
 #undef v
 
@@ -93,6 +139,23 @@  BEGIN(sub_return)
 	sbbl $0, %edx
 	addl  (v), %eax
 	adcl 4(v), %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+	movl %eax,  (v)
+	movl %edx, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+2:
+#endif
+RET_ENDP
+BEGIN(sub_return_wrap)
+	negl %edx
+	negl %eax
+	sbbl $0, %edx
+	addl  (v), %eax
+	adcl 4(v), %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -102,6 +165,19 @@  RET_ENDP
 BEGIN(inc)
 	addl $1,  (v)
 	adcl $0, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+	jno 0f
+	subl $1,  (v)
+	sbbl $0, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+
+RET_ENDP
+BEGIN(inc_wrap)
+	addl $1,  (v)
+	adcl $0, 4(v)
 RET_ENDP
 #undef v
 
@@ -111,6 +187,22 @@  BEGIN(inc_return)
 	movl 4(v), %edx
 	addl $1, %eax
 	adcl $0, %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+	movl %eax,  (v)
+	movl %edx, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+2:
+#endif
+RET_ENDP
+BEGIN(inc_return_wrap)
+	movl  (v), %eax
+	movl 4(v), %edx
+	addl $1, %eax
+	adcl $0, %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -120,6 +212,18 @@  RET_ENDP
 BEGIN(dec)
 	subl $1,  (v)
 	sbbl $0, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+	jno 0f
+	addl $1,  (v)
+	adcl $0, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+RET_ENDP
+BEGIN(dec_wrap)
+	subl $1,  (v)
+	sbbl $0, 4(v)
 RET_ENDP
 #undef v
 
@@ -129,6 +233,22 @@  BEGIN(dec_return)
 	movl 4(v), %edx
 	subl $1, %eax
 	sbbl $0, %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+	movl %eax,  (v)
+	movl %edx, 4(v)
+#ifdef CONFIG_HARDENED_ATOMIC
+2:
+#endif
+RET_ENDP
+BEGIN(dec_return_wrap)
+	movl  (v), %eax
+	movl 4(v), %edx
+	subl $1, %eax
+	sbbl $0, %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -140,6 +260,11 @@  BEGIN(add_unless)
 	adcl %edx, %edi
 	addl  (v), %eax
 	adcl 4(v), %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
 	cmpl %eax, %ecx
 	je 3f
 1:
@@ -165,6 +290,11 @@  BEGIN(inc_not_zero)
 1:
 	addl $1, %eax
 	adcl $0, %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
 	movl %eax,  (v)
 	movl %edx, 4(v)
 	movl $1, %eax
@@ -183,6 +313,11 @@  BEGIN(dec_if_positive)
 	movl 4(v), %edx
 	subl $1, %eax
 	sbbl $0, %edx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 1f)
+#endif
 	js 1f
 	movl %eax,  (v)
 	movl %edx, 4(v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index db3ae854..5bd864e 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -22,9 +22,19 @@ 
 
 ENTRY(atomic64_read_cx8)
 	read64 %ecx
+	/* Pax has pax_force_retaddr here
+	 * do we want similar? If yes, changes
+	 * have to be made in more places below */
 	ret
 ENDPROC(atomic64_read_cx8)
 
+ENTRY(atomic64_read_wrap_cx8)
+	read64 %ecx
+/* do we want smth like the below line?
+ *	pax_force_retaddr */
+	ret
+ENDPROC(atomic64_read_wrap_cx8)
+
 ENTRY(atomic64_set_cx8)
 1:
 /* we don't need LOCK_PREFIX since aligned 64-bit writes
@@ -35,6 +45,17 @@  ENTRY(atomic64_set_cx8)
 	ret
 ENDPROC(atomic64_set_cx8)
 
+ENTRY(atomic64_set_wrap_cx8)
+1:
+/* we don't need LOCK_PREFIX since aligned 64-bit writes
+ * are atomic on 586 and newer */
+	cmpxchg8b (%esi)
+	jne 1b
+
+	/* pax_force_retaddr */
+	ret
+ENDPROC(atomic64_set_wrap_cx8)
+
 ENTRY(atomic64_xchg_cx8)
 1:
 	LOCK_PREFIX
@@ -44,8 +65,8 @@  ENTRY(atomic64_xchg_cx8)
 	ret
 ENDPROC(atomic64_xchg_cx8)
 
-.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+.macro addsub_return func ins insc wrap=""
+ENTRY(atomic64_\func\()_return\wrap\()_cx8)
 	pushl %ebp
 	pushl %ebx
 	pushl %esi
@@ -61,6 +82,13 @@  ENTRY(atomic64_\func\()_return_cx8)
 	movl %edx, %ecx
 	\ins\()l %esi, %ebx
 	\insc\()l %edi, %ecx
+#ifdef CONFIG_HARDENED_ATOMIC
+.ifb \wrap
+	into
+2:
+	_ASM_EXTABLE(2b, 3f)
+.endif
+#endif
 	LOCK_PREFIX
 	cmpxchg8b (%ebp)
 	jne 1b
@@ -68,19 +96,27 @@  ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
+
+.ifb \wrap
+#ifdef CONFIG_HARDENED_ATOMIC
+3:
+#endif
+.endif
 	popl %edi
 	popl %esi
 	popl %ebx
 	popl %ebp
 	ret
-ENDPROC(atomic64_\func\()_return_cx8)
+ENDPROC(atomic64_\func\()_return\wrap\()_cx8)
 .endm
 
 addsub_return add add adc
 addsub_return sub sub sbb
+addsub_return add add adc _wrap
+addsub_return sub sub sbb _wrap
 
-.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+.macro incdec_return func ins insc wrap=""
+ENTRY(atomic64_\func\()_return\wrap\()_cx8)
 	pushl %ebx
 
 	read64 %esi
@@ -89,6 +125,13 @@  ENTRY(atomic64_\func\()_return_cx8)
 	movl %edx, %ecx
 	\ins\()l $1, %ebx
 	\insc\()l $0, %ecx
+#ifdef CONFIG_HARDENED_ATOMIC
+.ifb \wrap
+	into
+2:
+	_ASM_EXTABLE(2b, 3f)
+.endif
+#endif
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
@@ -96,13 +139,21 @@  ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
+
+.ifb \wrap
+#ifdef CONFIG_HARDENED_ATOMIC
+3:
+#endif
+.endif
 	popl %ebx
 	ret
-ENDPROC(atomic64_\func\()_return_cx8)
+ENDPROC(atomic64_\func\()_return\wrap\()_cx8)
 .endm
 
 incdec_return inc add adc
 incdec_return dec sub sbb
+incdec_return inc add adc _wrap
+incdec_return dec sub sbb _wrap
 
 ENTRY(atomic64_dec_if_positive_cx8)
 	pushl %ebx
@@ -113,6 +164,11 @@  ENTRY(atomic64_dec_if_positive_cx8)
 	movl %edx, %ecx
 	subl $1, %ebx
 	sbb $0, %ecx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
 	js 2f
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
@@ -144,6 +200,11 @@  ENTRY(atomic64_add_unless_cx8)
 	movl %edx, %ecx
 	addl %ebp, %ebx
 	adcl %edi, %ecx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 3f)
+#endif
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
@@ -173,6 +234,11 @@  ENTRY(atomic64_inc_not_zero_cx8)
 	xorl %ecx, %ecx
 	addl $1, %ebx
 	adcl %edx, %ecx
+#ifdef CONFIG_HARDENED_ATOMIC
+	into
+1234:
+	_ASM_EXTABLE(1234b, 3f)
+#endif
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b