diff mbox series

[v2,RESEND] net/mlx5e: Avoid field-overflowing memcpy()

Message ID 20220124172028.2410761-1-keescook@chromium.org (mailing list archive)
State Not Applicable
Headers show
Series [v2,RESEND] net/mlx5e: Avoid field-overflowing memcpy() | expand

Commit Message

Kees Cook Jan. 24, 2022, 5:20 p.m. UTC
In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

Use flexible arrays instead of zero-element arrays (which look like they
are always overflowing) and split the cross-field memcpy() into two halves
that can be appropriately bounds-checked by the compiler.

We were doing:

	#define ETH_HLEN  14
	#define VLAN_HLEN  4
	...
	#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
	...
        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
	...
        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
        struct mlx5_wqe_data_seg *dseg = wqe->data;
	...
	memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);

target is wqe->eth.inline_hdr.start (which the compiler sees as being
2 bytes in size), but copying 18, intending to write across start
(really vlan_tci, 2 bytes). The remaining 16 bytes get written into
wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
(8 bytes).

struct mlx5e_tx_wqe {
        struct mlx5_wqe_ctrl_seg   ctrl;                 /*     0    16 */
        struct mlx5_wqe_eth_seg    eth;                  /*    16    16 */
        struct mlx5_wqe_data_seg   data[];               /*    32     0 */

        /* size: 32, cachelines: 1, members: 3 */
        /* last cacheline: 32 bytes */
};

struct mlx5_wqe_eth_seg {
        u8                         swp_outer_l4_offset;  /*     0     1 */
        u8                         swp_outer_l3_offset;  /*     1     1 */
        u8                         swp_inner_l4_offset;  /*     2     1 */
        u8                         swp_inner_l3_offset;  /*     3     1 */
        u8                         cs_flags;             /*     4     1 */
        u8                         swp_flags;            /*     5     1 */
        __be16                     mss;                  /*     6     2 */
        __be32                     flow_table_metadata;  /*     8     4 */
        union {
                struct {
                        __be16     sz;                   /*    12     2 */
                        u8         start[2];             /*    14     2 */
                } inline_hdr;                            /*    12     4 */
                struct {
                        __be16     type;                 /*    12     2 */
                        __be16     vlan_tci;             /*    14     2 */
                } insert;                                /*    12     4 */
                __be32             trailer;              /*    12     4 */
        };                                               /*    12     4 */

        /* size: 16, cachelines: 1, members: 9 */
        /* last cacheline: 16 bytes */
};

struct mlx5_wqe_data_seg {
        __be32                     byte_count;           /*     0     4 */
        __be32                     lkey;                 /*     4     4 */
        __be64                     addr;                 /*     8     8 */

        /* size: 16, cachelines: 1, members: 3 */
        /* last cacheline: 16 bytes */
};

So, split the memcpy() so the compiler can reason about the buffer
sizes.

"pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe
nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
code changes (i.e. only source line number induced differences and
optimizations).

Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: bpf@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
Since this results in no binary differences, I will carry this in my tree
unless someone else wants to pick it up. It's one of the last remaining
clean-ups needed for the next step in memcpy() hardening.
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h     | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

Comments

Saeed Mahameed Jan. 26, 2022, 9:28 p.m. UTC | #1
On 24 Jan 09:20, Kees Cook wrote:
>In preparation for FORTIFY_SOURCE performing compile-time and run-time
>field bounds checking for memcpy(), memmove(), and memset(), avoid
>intentionally writing across neighboring fields.
>
>Use flexible arrays instead of zero-element arrays (which look like they
>are always overflowing) and split the cross-field memcpy() into two halves
>that can be appropriately bounds-checked by the compiler.
>
>We were doing:
>
>	#define ETH_HLEN  14
>	#define VLAN_HLEN  4
>	...
>	#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
>	...
>        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
>	...
>        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
>        struct mlx5_wqe_data_seg *dseg = wqe->data;
>	...
>	memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
>
>target is wqe->eth.inline_hdr.start (which the compiler sees as being
>2 bytes in size), but copying 18, intending to write across start
>(really vlan_tci, 2 bytes). The remaining 16 bytes get written into
>wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
>(8 bytes).
>
>struct mlx5e_tx_wqe {
>        struct mlx5_wqe_ctrl_seg   ctrl;                 /*     0    16 */
>        struct mlx5_wqe_eth_seg    eth;                  /*    16    16 */
>        struct mlx5_wqe_data_seg   data[];               /*    32     0 */
>
>        /* size: 32, cachelines: 1, members: 3 */
>        /* last cacheline: 32 bytes */
>};
>
>struct mlx5_wqe_eth_seg {
>        u8                         swp_outer_l4_offset;  /*     0     1 */
>        u8                         swp_outer_l3_offset;  /*     1     1 */
>        u8                         swp_inner_l4_offset;  /*     2     1 */
>        u8                         swp_inner_l3_offset;  /*     3     1 */
>        u8                         cs_flags;             /*     4     1 */
>        u8                         swp_flags;            /*     5     1 */
>        __be16                     mss;                  /*     6     2 */
>        __be32                     flow_table_metadata;  /*     8     4 */
>        union {
>                struct {
>                        __be16     sz;                   /*    12     2 */
>                        u8         start[2];             /*    14     2 */
>                } inline_hdr;                            /*    12     4 */
>                struct {
>                        __be16     type;                 /*    12     2 */
>                        __be16     vlan_tci;             /*    14     2 */
>                } insert;                                /*    12     4 */
>                __be32             trailer;              /*    12     4 */
>        };                                               /*    12     4 */
>
>        /* size: 16, cachelines: 1, members: 9 */
>        /* last cacheline: 16 bytes */
>};
>
>struct mlx5_wqe_data_seg {
>        __be32                     byte_count;           /*     0     4 */
>        __be32                     lkey;                 /*     4     4 */
>        __be64                     addr;                 /*     8     8 */
>
>        /* size: 16, cachelines: 1, members: 3 */
>        /* last cacheline: 16 bytes */
>};
>
>So, split the memcpy() so the compiler can reason about the buffer
>sizes.
>
>"pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe
>nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
>code changes (i.e. only source line number induced differences and
>optimizations).
>
>Cc: Saeed Mahameed <saeedm@nvidia.com>
>Cc: Leon Romanovsky <leon@kernel.org>
>Cc: "David S. Miller" <davem@davemloft.net>
>Cc: Jakub Kicinski <kuba@kernel.org>
>Cc: Alexei Starovoitov <ast@kernel.org>
>Cc: Daniel Borkmann <daniel@iogearbox.net>
>Cc: Jesper Dangaard Brouer <hawk@kernel.org>
>Cc: John Fastabend <john.fastabend@gmail.com>
>Cc: netdev@vger.kernel.org
>Cc: linux-rdma@vger.kernel.org
>Cc: bpf@vger.kernel.org
>Signed-off-by: Kees Cook <keescook@chromium.org>
>---
>Since this results in no binary differences, I will carry this in my tree
>unless someone else wants to pick it up. It's one of the last remaining
>clean-ups needed for the next step in memcpy() hardening.

applied to net-next-mlx5.

Thanks,
Saeed
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 812e6810cb3b..c14e06ca64d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -224,7 +224,7 @@  static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg  eth;
-	struct mlx5_wqe_data_seg data[0];
+	struct mlx5_wqe_data_seg data[];
 };
 
 struct mlx5e_rx_wqe_ll {
@@ -241,8 +241,8 @@  struct mlx5e_umr_wqe {
 	struct mlx5_wqe_umr_ctrl_seg   uctrl;
 	struct mlx5_mkey_seg           mkc;
 	union {
-		struct mlx5_mtt inline_mtts[0];
-		struct mlx5_klm inline_klms[0];
+		DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
+		DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
 	};
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 338d65e2c9ce..56e10c84a706 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -341,8 +341,10 @@  mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
 
 	/* copy the inline part if required */
 	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
-		memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
+		memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
 		eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+		memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+		       MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
 		dma_len  -= MLX5E_XDP_MIN_INLINE;
 		dma_addr += MLX5E_XDP_MIN_INLINE;
 		dseg++;