diff mbox series

[v2,bpf-next,10/13] bpf: Add instructions for atomic[64]_[fetch_]sub

Message ID 20201127175738.1085417-11-jackmanb@google.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Atomics for eBPF | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 4938 this patch: 4939
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 109 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_allmodconfig_warn fail Errors and warnings before: 5306 this patch: 5307
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Brendan Jackman Nov. 27, 2020, 5:57 p.m. UTC
Including only interpreter and x86 JIT support.

x86 doesn't provide an atomic exchange-and-subtract instruction that
could be used for BPF_SUB | BPF_FETCH, however we can just emit a NEG
followed by an XADD to get the same effect.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
 arch/x86/net/bpf_jit_comp.c  | 16 ++++++++++++++--
 include/linux/filter.h       | 20 ++++++++++++++++++++
 kernel/bpf/core.c            |  1 +
 kernel/bpf/disasm.c          | 16 ++++++++++++----
 kernel/bpf/verifier.c        |  2 ++
 tools/include/linux/filter.h | 20 ++++++++++++++++++++
 6 files changed, 69 insertions(+), 6 deletions(-)

Comments

kernel test robot Nov. 27, 2020, 9:39 p.m. UTC | #1
Hi Brendan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Brendan-Jackman/Atomics-for-eBPF/20201128-020057
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: i386-randconfig-s001-20201127 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-151-g540c2c4b-dirty
        # https://github.com/0day-ci/linux/commit/8b1823a5cf4569c72046175d217e3e2ad68c6a05
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Brendan-Jackman/Atomics-for-eBPF/20201128-020057
        git checkout 8b1823a5cf4569c72046175d217e3e2ad68c6a05
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
>> kernel/bpf/disasm.c:83:12: sparse: sparse: symbol 'bpf_atomic_alu_string' was not declared. Should it be static?

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Yonghong Song Nov. 28, 2020, 5:35 a.m. UTC | #2
On 11/27/20 9:57 AM, Brendan Jackman wrote:
> Including only interpreter and x86 JIT support.
> 
> x86 doesn't provide an atomic exchange-and-subtract instruction that
> could be used for BPF_SUB | BPF_FETCH, however we can just emit a NEG
> followed by an XADD to get the same effect.
> 
> Signed-off-by: Brendan Jackman <jackmanb@google.com>
> ---
>   arch/x86/net/bpf_jit_comp.c  | 16 ++++++++++++++--
>   include/linux/filter.h       | 20 ++++++++++++++++++++
>   kernel/bpf/core.c            |  1 +
>   kernel/bpf/disasm.c          | 16 ++++++++++++----
>   kernel/bpf/verifier.c        |  2 ++
>   tools/include/linux/filter.h | 20 ++++++++++++++++++++
>   6 files changed, 69 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 7431b2937157..a8a9fab13fcf 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -823,6 +823,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
>   
>   	/* emit opcode */
>   	switch (atomic_op) {
> +	case BPF_SUB:
>   	case BPF_ADD:
>   		/* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */
>   		EMIT1(simple_alu_opcodes[atomic_op]);
> @@ -1306,8 +1307,19 @@ st:			if (is_imm8(insn->off))
>   
>   		case BPF_STX | BPF_ATOMIC | BPF_W:
>   		case BPF_STX | BPF_ATOMIC | BPF_DW:
> -			err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
> -					  insn->off, BPF_SIZE(insn->code));
> +			if (insn->imm == (BPF_SUB | BPF_FETCH)) {
> +				/*
> +				 * x86 doesn't have an XSUB insn, so we negate
> +				 * and XADD instead.
> +				 */
> +				emit_neg(&prog, src_reg, BPF_SIZE(insn->code) == BPF_DW);
> +				err = emit_atomic(&prog, BPF_ADD | BPF_FETCH,
> +						  dst_reg, src_reg, insn->off,
> +						  BPF_SIZE(insn->code));
> +			} else {
> +				err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
> +						  insn->off, BPF_SIZE(insn->code));
> +			}
>   			if (err)
>   				return err;
>   			break;
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 6186280715ed..a20a3a536bf5 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -280,6 +280,26 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
>   		.off   = OFF,					\
>   		.imm   = BPF_ADD | BPF_FETCH })
>   
> +/* Atomic memory sub, *(uint *)(dst_reg + off16) -= src_reg */
> +
> +#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)			\
> +	((struct bpf_insn) {					\
> +		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
> +		.dst_reg = DST,					\
> +		.src_reg = SRC,					\
> +		.off   = OFF,					\
> +		.imm   = BPF_SUB })

Currently, llvm does not support XSUB, should we support it in llvm?
At source code, as implemented in JIT, user can just do a negate
followed by xadd.

> +
> +/* Atomic memory sub with fetch, src_reg = atomic_fetch_sub(*(dst_reg + off), src_reg); */
> +
> +#define BPF_ATOMIC_FETCH_SUB(SIZE, DST, SRC, OFF)		\
> +	((struct bpf_insn) {					\
> +		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
> +		.dst_reg = DST,					\
> +		.src_reg = SRC,					\
> +		.off   = OFF,					\
> +		.imm   = BPF_SUB | BPF_FETCH })
> +
>   /* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */
>   
>   #define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)			\
[...]
Alexei Starovoitov Nov. 29, 2020, 1:34 a.m. UTC | #3
On Fri, Nov 27, 2020 at 09:35:07PM -0800, Yonghong Song wrote:
> 
> 
> On 11/27/20 9:57 AM, Brendan Jackman wrote:
> > Including only interpreter and x86 JIT support.
> > 
> > x86 doesn't provide an atomic exchange-and-subtract instruction that
> > could be used for BPF_SUB | BPF_FETCH, however we can just emit a NEG
> > followed by an XADD to get the same effect.
> > 
> > Signed-off-by: Brendan Jackman <jackmanb@google.com>
> > ---
> >   arch/x86/net/bpf_jit_comp.c  | 16 ++++++++++++++--
> >   include/linux/filter.h       | 20 ++++++++++++++++++++
> >   kernel/bpf/core.c            |  1 +
> >   kernel/bpf/disasm.c          | 16 ++++++++++++----
> >   kernel/bpf/verifier.c        |  2 ++
> >   tools/include/linux/filter.h | 20 ++++++++++++++++++++
> >   6 files changed, 69 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index 7431b2937157..a8a9fab13fcf 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -823,6 +823,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
> >   	/* emit opcode */
> >   	switch (atomic_op) {
> > +	case BPF_SUB:
> >   	case BPF_ADD:
> >   		/* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */
> >   		EMIT1(simple_alu_opcodes[atomic_op]);
> > @@ -1306,8 +1307,19 @@ st:			if (is_imm8(insn->off))
> >   		case BPF_STX | BPF_ATOMIC | BPF_W:
> >   		case BPF_STX | BPF_ATOMIC | BPF_DW:
> > -			err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
> > -					  insn->off, BPF_SIZE(insn->code));
> > +			if (insn->imm == (BPF_SUB | BPF_FETCH)) {
> > +				/*
> > +				 * x86 doesn't have an XSUB insn, so we negate
> > +				 * and XADD instead.
> > +				 */
> > +				emit_neg(&prog, src_reg, BPF_SIZE(insn->code) == BPF_DW);
> > +				err = emit_atomic(&prog, BPF_ADD | BPF_FETCH,
> > +						  dst_reg, src_reg, insn->off,
> > +						  BPF_SIZE(insn->code));
> > +			} else {
> > +				err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
> > +						  insn->off, BPF_SIZE(insn->code));
> > +			}
> >   			if (err)
> >   				return err;
> >   			break;
> > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > index 6186280715ed..a20a3a536bf5 100644
> > --- a/include/linux/filter.h
> > +++ b/include/linux/filter.h
> > @@ -280,6 +280,26 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
> >   		.off   = OFF,					\
> >   		.imm   = BPF_ADD | BPF_FETCH })
> > +/* Atomic memory sub, *(uint *)(dst_reg + off16) -= src_reg */
> > +
> > +#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)			\
> > +	((struct bpf_insn) {					\
> > +		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
> > +		.dst_reg = DST,					\
> > +		.src_reg = SRC,					\
> > +		.off   = OFF,					\
> > +		.imm   = BPF_SUB })
> 
> Currently, llvm does not support XSUB, should we support it in llvm?
> At source code, as implemented in JIT, user can just do a negate
> followed by xadd.

I forgot we have BPF_NEG insn :)
Indeed it's probably easier to handle atomic_fetch_sub() builtin
completely on llvm side. It can generate bpf_neg followed by atomic_fetch_add.
No need to burden verifier, interpreter and JITs with it.
Yonghong Song Nov. 30, 2020, 5:18 p.m. UTC | #4
On 11/28/20 5:34 PM, Alexei Starovoitov wrote:
> On Fri, Nov 27, 2020 at 09:35:07PM -0800, Yonghong Song wrote:
>>
>>
>> On 11/27/20 9:57 AM, Brendan Jackman wrote:
>>> Including only interpreter and x86 JIT support.
>>>
>>> x86 doesn't provide an atomic exchange-and-subtract instruction that
>>> could be used for BPF_SUB | BPF_FETCH, however we can just emit a NEG
>>> followed by an XADD to get the same effect.
>>>
>>> Signed-off-by: Brendan Jackman <jackmanb@google.com>
>>> ---
>>>    arch/x86/net/bpf_jit_comp.c  | 16 ++++++++++++++--
>>>    include/linux/filter.h       | 20 ++++++++++++++++++++
>>>    kernel/bpf/core.c            |  1 +
>>>    kernel/bpf/disasm.c          | 16 ++++++++++++----
>>>    kernel/bpf/verifier.c        |  2 ++
>>>    tools/include/linux/filter.h | 20 ++++++++++++++++++++
>>>    6 files changed, 69 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>>> index 7431b2937157..a8a9fab13fcf 100644
>>> --- a/arch/x86/net/bpf_jit_comp.c
>>> +++ b/arch/x86/net/bpf_jit_comp.c
>>> @@ -823,6 +823,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
>>>    	/* emit opcode */
>>>    	switch (atomic_op) {
>>> +	case BPF_SUB:
>>>    	case BPF_ADD:
>>>    		/* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */
>>>    		EMIT1(simple_alu_opcodes[atomic_op]);
>>> @@ -1306,8 +1307,19 @@ st:			if (is_imm8(insn->off))
>>>    		case BPF_STX | BPF_ATOMIC | BPF_W:
>>>    		case BPF_STX | BPF_ATOMIC | BPF_DW:
>>> -			err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
>>> -					  insn->off, BPF_SIZE(insn->code));
>>> +			if (insn->imm == (BPF_SUB | BPF_FETCH)) {
>>> +				/*
>>> +				 * x86 doesn't have an XSUB insn, so we negate
>>> +				 * and XADD instead.
>>> +				 */
>>> +				emit_neg(&prog, src_reg, BPF_SIZE(insn->code) == BPF_DW);
>>> +				err = emit_atomic(&prog, BPF_ADD | BPF_FETCH,
>>> +						  dst_reg, src_reg, insn->off,
>>> +						  BPF_SIZE(insn->code));
>>> +			} else {
>>> +				err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
>>> +						  insn->off, BPF_SIZE(insn->code));
>>> +			}
>>>    			if (err)
>>>    				return err;
>>>    			break;
>>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>>> index 6186280715ed..a20a3a536bf5 100644
>>> --- a/include/linux/filter.h
>>> +++ b/include/linux/filter.h
>>> @@ -280,6 +280,26 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
>>>    		.off   = OFF,					\
>>>    		.imm   = BPF_ADD | BPF_FETCH })
>>> +/* Atomic memory sub, *(uint *)(dst_reg + off16) -= src_reg */
>>> +
>>> +#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)			\
>>> +	((struct bpf_insn) {					\
>>> +		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
>>> +		.dst_reg = DST,					\
>>> +		.src_reg = SRC,					\
>>> +		.off   = OFF,					\
>>> +		.imm   = BPF_SUB })
>>
>> Currently, llvm does not support XSUB, should we support it in llvm?
>> At source code, as implemented in JIT, user can just do a negate
>> followed by xadd.
> 
> I forgot we have BPF_NEG insn :)
> Indeed it's probably easier to handle atomic_fetch_sub() builtin
> completely on llvm side. It can generate bpf_neg followed by atomic_fetch_add.

Just tried. llvm selectiondag won't be able to automatically
convert atomic_fetch_sub to neg + atomic_fetch_add. So there
will be a need in BPFInstrInfo.td to match atomic_fetch_sub IR
pattern. I will experiment this together with xsub.

> No need to burden verifier, interpreter and JITs with it.
>
Brendan Jackman Dec. 1, 2020, 12:38 p.m. UTC | #5
On Mon, Nov 30, 2020 at 09:18:09AM -0800, Yonghong Song wrote:
> On 11/28/20 5:34 PM, Alexei Starovoitov wrote:
> > On Fri, Nov 27, 2020 at 09:35:07PM -0800, Yonghong Song wrote:
> > > On 11/27/20 9:57 AM, Brendan Jackman wrote:
[...]
> > > > +#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)			\
> > > > +	((struct bpf_insn) {					\
> > > > +		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
> > > > +		.dst_reg = DST,					\
> > > > +		.src_reg = SRC,					\
> > > > +		.off   = OFF,					\
> > > > +		.imm   = BPF_SUB })
> > > 
> > > Currently, llvm does not support XSUB, should we support it in llvm?
> > > At source code, as implemented in JIT, user can just do a negate
> > > followed by xadd.
> > 
> > I forgot we have BPF_NEG insn :)
> > Indeed it's probably easier to handle atomic_fetch_sub() builtin
> > completely on llvm side. It can generate bpf_neg followed by atomic_fetch_add.
> 
> Just tried. llvm selectiondag won't be able to automatically
> convert atomic_fetch_sub to neg + atomic_fetch_add. So there
> will be a need in BPFInstrInfo.td to match atomic_fetch_sub IR
> pattern. I will experiment this together with xsub.
> 
> > No need to burden verifier, interpreter and JITs with it.
> > 

I guess it's also worth remembering other archs might have an atomic
subtract.
Alexei Starovoitov Dec. 2, 2020, 5:55 a.m. UTC | #6
On Tue, Dec 1, 2020 at 4:38 AM Brendan Jackman <jackmanb@google.com> wrote:
>
> I guess it's also worth remembering other archs might have an atomic
> subtract.

which one?
arm64 LSE implements atomic_fetch_sub as neg+ldadd.
imo x64 and arm64 example outweighs choices by other archs if there are such.
Even without LSE it will be neg+llsc loop.
The reason I proposed bpf xsub insn earlier is that I thought that llvm
won't be able to emit it so easily and JIT/verifier would struggle.
Brendan Jackman Dec. 2, 2020, 11:19 a.m. UTC | #7
On Tue, Dec 01, 2020 at 09:55:22PM -0800, Alexei Starovoitov wrote:
> On Tue, Dec 1, 2020 at 4:38 AM Brendan Jackman <jackmanb@google.com> wrote:
> >
> > I guess it's also worth remembering other archs might have an atomic
> > subtract.
> 
> which one?
> arm64 LSE implements atomic_fetch_sub as neg+ldadd.
> imo x64 and arm64 example outweighs choices by other archs if there are such.
> Even without LSE it will be neg+llsc loop.
> The reason I proposed bpf xsub insn earlier is that I thought that llvm
> won't be able to emit it so easily and JIT/verifier would struggle.

Ack, I'll drop the atomic subtract instruction.
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7431b2937157..a8a9fab13fcf 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -823,6 +823,7 @@  static int emit_atomic(u8 **pprog, u8 atomic_op,
 
 	/* emit opcode */
 	switch (atomic_op) {
+	case BPF_SUB:
 	case BPF_ADD:
 		/* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */
 		EMIT1(simple_alu_opcodes[atomic_op]);
@@ -1306,8 +1307,19 @@  st:			if (is_imm8(insn->off))
 
 		case BPF_STX | BPF_ATOMIC | BPF_W:
 		case BPF_STX | BPF_ATOMIC | BPF_DW:
-			err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
-					  insn->off, BPF_SIZE(insn->code));
+			if (insn->imm == (BPF_SUB | BPF_FETCH)) {
+				/*
+				 * x86 doesn't have an XSUB insn, so we negate
+				 * and XADD instead.
+				 */
+				emit_neg(&prog, src_reg, BPF_SIZE(insn->code) == BPF_DW);
+				err = emit_atomic(&prog, BPF_ADD | BPF_FETCH,
+						  dst_reg, src_reg, insn->off,
+						  BPF_SIZE(insn->code));
+			} else {
+				err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
+						  insn->off, BPF_SIZE(insn->code));
+			}
 			if (err)
 				return err;
 			break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6186280715ed..a20a3a536bf5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -280,6 +280,26 @@  static inline bool insn_is_zext(const struct bpf_insn *insn)
 		.off   = OFF,					\
 		.imm   = BPF_ADD | BPF_FETCH })
 
+/* Atomic memory sub, *(uint *)(dst_reg + off16) -= src_reg */
+
+#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = BPF_SUB })
+
+/* Atomic memory sub with fetch, src_reg = atomic_fetch_sub(*(dst_reg + off), src_reg); */
+
+#define BPF_ATOMIC_FETCH_SUB(SIZE, DST, SRC, OFF)		\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = BPF_SUB | BPF_FETCH })
+
 /* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */
 
 #define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)			\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 20a5351d1dc2..0f700464955f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1650,6 +1650,7 @@  static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 	STX_ATOMIC_W:
 		switch (IMM) {
 		ATOMIC(BPF_ADD, add)
+		ATOMIC(BPF_SUB, sub)
 
 		case BPF_XCHG:
 			if (BPF_SIZE(insn->code) == BPF_W)
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 3441ac54ac65..f33acffdeed0 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -80,6 +80,11 @@  const char *const bpf_alu_string[16] = {
 	[BPF_END >> 4]  = "endian",
 };
 
+const char *const bpf_atomic_alu_string[16] = {
+	[BPF_ADD >> 4]  = "add",
+	[BPF_SUB >> 4]  = "sub",
+};
+
 static const char *const bpf_ldst_string[] = {
 	[BPF_W >> 3]  = "u32",
 	[BPF_H >> 3]  = "u16",
@@ -154,17 +159,20 @@  void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 				insn->dst_reg,
 				insn->off, insn->src_reg);
 		else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
-			 insn->imm == BPF_ADD) {
-			verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+			 (insn->imm == BPF_ADD || insn->imm == BPF_SUB)) {
+			verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) %s r%d\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg, insn->off,
+				bpf_alu_string[BPF_OP(insn->imm) >> 4],
 				insn->src_reg);
 		} else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
-			   insn->imm == (BPF_ADD | BPF_FETCH)) {
-			verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_add(*(%s *)(r%d %+d), r%d)\n",
+			   (insn->imm == (BPF_ADD | BPF_FETCH) ||
+			    insn->imm == (BPF_SUB | BPF_FETCH))) {
+			verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_%s(*(%s *)(r%d %+d), r%d)\n",
 				insn->code, insn->src_reg,
 				BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
+				bpf_atomic_alu_string[BPF_OP(insn->imm) >> 4],
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg, insn->off, insn->src_reg);
 		} else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c8311cc114ec..dea9ad486ad1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3606,6 +3606,8 @@  static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
 	switch (insn->imm) {
 	case BPF_ADD:
 	case BPF_ADD | BPF_FETCH:
+	case BPF_SUB:
+	case BPF_SUB | BPF_FETCH:
 	case BPF_XCHG:
 	case BPF_CMPXCHG:
 		break;
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index ea99bd17d003..387eddaf11e5 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -190,6 +190,26 @@ 
 		.off   = OFF,					\
 		.imm   = BPF_ADD | BPF_FETCH })
 
+/* Atomic memory sub, *(uint *)(dst_reg + off16) -= src_reg */
+
+#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = BPF_SUB })
+
+/* Atomic memory sub with fetch, src_reg = atomic_fetch_sub(*(dst_reg + off), src_reg); */
+
+#define BPF_ATOMIC_FETCH_SUB(SIZE, DST, SRC, OFF)		\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = BPF_SUB | BPF_FETCH })
+
 /* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */
 
 #define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)			\