diff mbox series

[RFC,V4,net-next,1/5] ethtool: Allow network drivers to dump arbitrary EEPROM data

Message ID 1616433075-27051-2-git-send-email-moshe@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series ethtool: Extend module EEPROM dump API | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 12 maintainers not CCed: linux-api@vger.kernel.org amitc@mellanox.com johannes.berg@intel.com gustavo@embeddedor.com linux@rempel-privat.de linux-doc@vger.kernel.org corbet@lwn.net danieller@nvidia.com f.fainelli@gmail.com mchehab+huawei@kernel.org irusskikh@marvell.com alexanderduyck@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 3075 this patch: 3075
netdev/kdoc fail Errors and warnings before: 16 this patch: 17
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 3193 this patch: 3193
netdev/header_inline success Link

Commit Message

Moshe Shemesh March 22, 2021, 5:11 p.m. UTC
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>

Define get_module_eeprom_by_page() ethtool callback and implement
netlink infrastructure.

get_module_eeprom_by_page() allows network drivers to dump a part of
module's EEPROM specified by page and bank numbers along with offset and
length. It is effectively a netlink replacement for get_module_info()
and get_module_eeprom() pair, which is needed due to emergence of
complex non-linear EEPROM layouts.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
---
 Documentation/networking/ethtool-netlink.rst |  32 +++-
 include/linux/ethtool.h                      |  33 +++-
 include/uapi/linux/ethtool_netlink.h         |  19 +++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/eeprom.c                         | 161 +++++++++++++++++++
 net/ethtool/netlink.c                        |  10 ++
 net/ethtool/netlink.h                        |   2 +
 7 files changed, 255 insertions(+), 4 deletions(-)
 create mode 100644 net/ethtool/eeprom.c

Comments

Don Bollinger March 22, 2021, 6:17 p.m. UTC | #1
> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> 
> Define get_module_eeprom_by_page() ethtool callback and implement
> netlink infrastructure.
> 
> get_module_eeprom_by_page() allows network drivers to dump a part of
> module's EEPROM specified by page and bank numbers along with offset and
> length. It is effectively a netlink replacement for get_module_info() and
> get_module_eeprom() pair, which is needed due to emergence of complex
> non-linear EEPROM layouts.
> 
> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> ---
>  Documentation/networking/ethtool-netlink.rst |  32 +++-
>  include/linux/ethtool.h                      |  33 +++-
>  include/uapi/linux/ethtool_netlink.h         |  19 +++
>  net/ethtool/Makefile                         |   2 +-
>  net/ethtool/eeprom.c                         | 161 +++++++++++++++++++
>  net/ethtool/netlink.c                        |  10 ++
>  net/ethtool/netlink.h                        |   2 +
>  7 files changed, 255 insertions(+), 4 deletions(-)  create mode 100644
> net/ethtool/eeprom.c
> 
> diff --git a/Documentation/networking/ethtool-netlink.rst
> b/Documentation/networking/ethtool-netlink.rst
> index 05073482db05..b370d1186ceb 100644
> --- a/Documentation/networking/ethtool-netlink.rst
> +++ b/Documentation/networking/ethtool-netlink.rst
> @@ -1280,6 +1280,34 @@ Kernel response contents:
>  For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES``
> indicates that  the table contains static entries, hard-coded by the NIC.
> 
> +MODULE_EEPROM
> +===========
> +
> +Fetch module EEPROM data dump.
> +
> +Request contents:
> +
> +  =======================================  ======
> ==========================
> +  ``ETHTOOL_A_MODULE_EEPROM_HEADER``       nested  request header
> +  ``ETHTOOL_A_MODULE_EEPROM_OFFSET``       u32     offset within a page
> +  ``ETHTOOL_A_MODULE_EEPROM_LENGTH``       u32     amount of bytes to
> read
> +  ``ETHTOOL_A_MODULE_EEPROM_PAGE``         u8      page number
> +  ``ETHTOOL_A_MODULE_EEPROM_BANK``         u8      bank number
> +  ``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS``  u8      page I2C address
> +  =======================================  ======
> + ==========================
> +
> +Kernel response contents:
> +
> +
+---------------------------------------------+--------+--------------------
-+
> + | ``ETHTOOL_A_MODULE_EEPROM_HEADER``          | nested | reply header
> |
> +
+---------------------------------------------+--------+--------------------
-+
> + | ``ETHTOOL_A_MODULE_EEPROM_DATA``            | nested | array of bytes
> from |
> + |                                             |        | module EEPROM
|
> +
+---------------------------------------------+--------+--------------------
-+
> +
> +``ETHTOOL_A_MODULE_EEPROM_DATA`` has an attribute length equal to
> the
> +amount of bytes driver actually read.
> +
>  Request translation
>  ===================
> 
> @@ -1357,8 +1385,8 @@ are netlink only.
>    ``ETHTOOL_GET_DUMP_FLAG``           n/a
>    ``ETHTOOL_GET_DUMP_DATA``           n/a
>    ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
> -  ``ETHTOOL_GMODULEINFO``             n/a
> -  ``ETHTOOL_GMODULEEEPROM``           n/a
> +  ``ETHTOOL_GMODULEINFO``
> ``ETHTOOL_MSG_MODULE_EEPROM_GET``
> +  ``ETHTOOL_GMODULEEEPROM``
> ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>    ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
>    ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
>    ``ETHTOOL_GRSSH``                   n/a
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index
> ec4cd3921c67..2f6df3b75dd6 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -81,6 +81,7 @@ enum {
>  #define ETH_RSS_HASH_NO_CHANGE	0
> 
>  struct net_device;
> +struct netlink_ext_ack;
> 
>  /* Some generic methods drivers may use in their ethtool_ops */
>  u32 ethtool_op_get_link(struct net_device *dev); @@ -265,6 +266,31 @@
> struct ethtool_pause_stats {
>  	u64 rx_pause_frames;
>  };
> 
> +#define ETH_MODULE_EEPROM_PAGE_LEN	256

Sorry to keep raising issues, but I think you want to make this constant
128.  The page register swaps in different pages, each 128 bytes long, into
the full 256 byte address range of the device.  If you check, you will find
you use ETH_MODULE_EEPROM_PAGE_LEN/2 most of the time.  More important, note
below, your #define MODULE_EEPROM_MAX_OFFSET  is twice as large as it should
be because you count each page as 256 bytes.

If you prefer to call the whole 256 bytes a page, I'm OK with that.  You'll
need to change the MAX_OFFSET constant as noted below.

> +#define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
> +
> +/**
> + * struct ethtool_eeprom_data - EEPROM dump from specified page
> + * @offset: Offset within the specified EEPROM page to begin read, in
> bytes.
> + * @length: Number of bytes to read.
> + * @page: Page number to read from.
> + * @bank: Page bank number to read from, if applicable by EEPROM spec.
> + * @i2c_address: I2C address of a page. Value less than 0x7f expected.
> Most
> + *	EEPROMs use 0x50 or 0x51.
> + * @data: Pointer to buffer with EEPROM data of @length size.
> + *
> + * This can be used to manage pages during EEPROM dump in ethtool and
> +pass
> + * required information to the driver.
> + */
> +struct ethtool_module_eeprom {
> +	__u32	offset;
> +	__u32	length;
> +	__u8	page;
> +	__u8	bank;
> +	__u8	i2c_address;
> +	__u8	*data;
> +};
> +
>  /**
>   * struct ethtool_ops - optional netdev operations
>   * @cap_link_lanes_supported: indicates if the driver supports lanes @@ -
> 410,6 +436,9 @@ struct ethtool_pause_stats {
>   * @get_ethtool_phy_stats: Return extended statistics about the PHY
> device.
>   *	This is only useful if the device maintains PHY statistics and
>   *	cannot use the standard PHY library helpers.
> + * @get_module_eeprom_by_page: Get a region of plug-in module
> EEPROM data from
> + *	specified page. Returns a negative error code or the amount of bytes
> + *	read.
>   *
>   * All operations are optional (i.e. the function pointer may be set
>   * to %NULL) and callers must take this into account.  Callers must @@
-515,6
> +544,9 @@ struct ethtool_ops {
>  				   const struct ethtool_tunable *, void *);
>  	int	(*set_phy_tunable)(struct net_device *,
>  				   const struct ethtool_tunable *, const
void
> *);
> +	int	(*get_module_eeprom_by_page)(struct net_device *dev,
> +					     const struct
> ethtool_module_eeprom *page,
> +					     struct netlink_ext_ack
*extack);
>  };
> 
>  int ethtool_check_ops(const struct ethtool_ops *ops); @@ -538,7 +570,6
> @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
>  				       const struct ethtool_link_ksettings
*cmd,
>  				       u32 *dev_speed, u8 *dev_duplex);
> 
> -struct netlink_ext_ack;
>  struct phy_device;
>  struct phy_tdr_config;
> 
> diff --git a/include/uapi/linux/ethtool_netlink.h
> b/include/uapi/linux/ethtool_netlink.h
> index a286635ac9b8..c99dea5afbb2 100644
> --- a/include/uapi/linux/ethtool_netlink.h
> +++ b/include/uapi/linux/ethtool_netlink.h
> @@ -42,6 +42,7 @@ enum {
>  	ETHTOOL_MSG_CABLE_TEST_ACT,
>  	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
>  	ETHTOOL_MSG_TUNNEL_INFO_GET,
> +	ETHTOOL_MSG_MODULE_EEPROM_GET,
> 
>  	/* add new constants above here */
>  	__ETHTOOL_MSG_USER_CNT,
> @@ -80,6 +81,7 @@ enum {
>  	ETHTOOL_MSG_CABLE_TEST_NTF,
>  	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
>  	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
> +	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
> 
>  	/* add new constants above here */
>  	__ETHTOOL_MSG_KERNEL_CNT,
> @@ -629,6 +631,23 @@ enum {
>  	ETHTOOL_A_TUNNEL_INFO_MAX =
> (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)  };
> 
> +/* MODULE EEPROM */
> +
> +enum {
> +	ETHTOOL_A_MODULE_EEPROM_UNSPEC,
> +	ETHTOOL_A_MODULE_EEPROM_HEADER,			/*
> nest - _A_HEADER_* */
> +
> +	ETHTOOL_A_MODULE_EEPROM_OFFSET,			/*
> u32 */
> +	ETHTOOL_A_MODULE_EEPROM_LENGTH,			/*
> u32 */
> +	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8
> */
> +	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8
> */
> +	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8
> */
> +	ETHTOOL_A_MODULE_EEPROM_DATA,			/*
> nested */
> +
> +	__ETHTOOL_A_MODULE_EEPROM_CNT,
> +	ETHTOOL_A_MODULE_EEPROM_MAX =
> (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) };
> +
>  /* generic netlink info */
>  #define ETHTOOL_GENL_NAME "ethtool"
>  #define ETHTOOL_GENL_VERSION 1
> diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index
> 7a849ff22dad..d604346bc074 100644
> --- a/net/ethtool/Makefile
> +++ b/net/ethtool/Makefile
> @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
>  ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
>  		   linkstate.o debug.o wol.o features.o privflags.o rings.o
\
>  		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o
\
> -		   tunnels.o
> +		   tunnels.o eeprom.o
> diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c new file mode
> 100644 index 000000000000..79d75e0e2391
> --- /dev/null
> +++ b/net/ethtool/eeprom.c
> @@ -0,0 +1,161 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/ethtool.h>
> +#include "netlink.h"
> +#include "common.h"
> +
> +struct eeprom_req_info {
> +	struct ethnl_req_info	base;
> +	u32			offset;
> +	u32			length;
> +	u8			page;
> +	u8			bank;
> +	u8			i2c_address;
> +};
> +
> +struct eeprom_reply_data {
> +	struct ethnl_reply_data base;
> +	u32			length;
> +	u8			*data;
> +};
> +
> +/* Module EEPROMs use one byte to store page number, while each page
> is
> +256
> + * bytes long, so in principle offset is limited by a multiple of that,
> + * plus the length of an additional SFP page located at high I2C address.
> + */
> +#define MODULE_EEPROM_MAX_OFFSET (257 *
> ETH_MODULE_EEPROM_PAGE_LEN)

The device actually has 257 addressable chunks of 128 bytes each.  With
ETH_MODULE_EEPROM_PAGE_LEN set to 256, your constant is 2X too big.

Note also, SFP devices (but not QSFP or CMIS) actually have another 256
bytes available at 0x50, in addition to the full 257*128 at 0x51.  So SFP is
actually 259*128 or (256 + 257 * 128).

Devices that don't support pages have much lower limits (256 bytes for
QSFP/CMIS and 512 for SFP).  Some SFP only support 256 bytes.  Most devices
will return nonsense for pages above 3.  So, this check is really only an
absolute limit.  The SFP driver that takes this request will probably check
against a more refined MAX length (eg modinfo->eeprom_len).

I suggest setting this constant to 259 * (ETH_MODULE_EEPROM_PAGE_LEN / 2).
Let the driver refine it from there.

> +
> +#define MODULE_EEPROM_REQINFO(__req_base) \
> +	container_of(__req_base, struct eeprom_req_info, base)
> +
> +#define MODULE_EEPROM_REPDATA(__reply_base) \
> +	container_of(__reply_base, struct eeprom_reply_data, base)
> +
> +static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
> +			       struct ethnl_reply_data *reply_base,
> +			       struct genl_info *info)
> +{
> +	struct eeprom_reply_data *reply =
> MODULE_EEPROM_REPDATA(reply_base);
> +	struct eeprom_req_info *request =
> MODULE_EEPROM_REQINFO(req_base);
> +	struct ethtool_module_eeprom page_data = {0};
> +	struct net_device *dev = reply_base->dev;
> +	int ret;
> +
> +	if (!dev->ethtool_ops->get_module_eeprom_by_page)
> +		return -EOPNOTSUPP;
> +
> +	/* Allow dumps either of low or high page without crossing half page
> boundary */
> +	if ((request->offset < ETH_MODULE_EEPROM_PAGE_LEN / 2 &&
> +	     request->offset + request->length >
> ETH_MODULE_EEPROM_PAGE_LEN / 2) ||
> +	    request->offset + request->length >
> ETH_MODULE_EEPROM_PAGE_LEN)
> +		return -EINVAL;
> +
> +	page_data.offset = request->offset;
> +	page_data.length = request->length;
> +	page_data.i2c_address = request->i2c_address;
> +	page_data.page = request->page;
> +	page_data.bank = request->bank;
> +	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
> +	if (!page_data.data)
> +		return -ENOMEM;
> +
> +	ret = ethnl_ops_begin(dev);
> +	if (ret)
> +		goto err_free;
> +
> +	ret = dev->ethtool_ops->get_module_eeprom_by_page(dev,
> &page_data,
> +							  info->extack);
> +	if (ret < 0)
> +		goto err_ops;
> +
> +	reply->length = ret;
> +	reply->data = page_data.data;
> +
> +	ethnl_ops_complete(dev);
> +	return 0;
> +
> +err_ops:
> +	ethnl_ops_complete(dev);
> +err_free:
> +	kfree(page_data.data);
> +	return ret;
> +}
> +
> +static int eeprom_parse_request(struct ethnl_req_info *req_info, struct
> nlattr **tb,
> +				struct netlink_ext_ack *extack)
> +{
> +	struct eeprom_req_info *request =
> MODULE_EEPROM_REQINFO(req_info);
> +
> +	if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] ||
> +	    !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] ||
> +	    !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS])
> +		return -EINVAL;
> +
> +	request->i2c_address =
> nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]);
> +	request->offset =
> nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]);
> +	request->length =
> nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]);
> +
> +	if (request->offset > MODULE_EEPROM_MAX_OFFSET)
> +		return -EINVAL;
> +
> +	if (tb[ETHTOOL_A_MODULE_EEPROM_PAGE]) {
> +		request->page =
> nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_PAGE]);
> +		if (request->page > 0 &&
> +		    (request->offset < ETH_MODULE_EEPROM_PAGE_LEN / 2
> ||
> +		     request->offset + request->length >
> ETH_MODULE_EEPROM_PAGE_LEN))
> +			return -EINVAL;
> +	}
> +	if (tb[ETHTOOL_A_MODULE_EEPROM_BANK])
> +		request->bank =
> nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_BANK]);
> +
> +	return 0;
> +}
> +
> +static int eeprom_reply_size(const struct ethnl_req_info *req_base,
> +			     const struct ethnl_reply_data *reply_base) {
> +	const struct eeprom_req_info *request =
> +MODULE_EEPROM_REQINFO(req_base);
> +
> +	return nla_total_size(sizeof(u8) * request->length); /*
> _EEPROM_DATA
> +*/ }
> +
> +static int eeprom_fill_reply(struct sk_buff *skb,
> +			     const struct ethnl_req_info *req_base,
> +			     const struct ethnl_reply_data *reply_base) {
> +	struct eeprom_reply_data *reply =
> MODULE_EEPROM_REPDATA(reply_base);
> +
> +	return nla_put(skb, ETHTOOL_A_MODULE_EEPROM_DATA, reply-
> >length,
> +reply->data); }
> +
> +static void eeprom_cleanup_data(struct ethnl_reply_data *reply_base) {
> +	struct eeprom_reply_data *reply =
> MODULE_EEPROM_REPDATA(reply_base);
> +
> +	kfree(reply->data);
> +}
> +
> +const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
> +	.request_cmd		= ETHTOOL_MSG_MODULE_EEPROM_GET,
> +	.reply_cmd		=
> ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
> +	.hdr_attr		= ETHTOOL_A_MODULE_EEPROM_HEADER,
> +	.req_info_size		= sizeof(struct eeprom_req_info),
> +	.reply_data_size	= sizeof(struct eeprom_reply_data),
> +
> +	.parse_request		= eeprom_parse_request,
> +	.prepare_data		= eeprom_prepare_data,
> +	.reply_size		= eeprom_reply_size,
> +	.fill_reply		= eeprom_fill_reply,
> +	.cleanup_data		= eeprom_cleanup_data,
> +};
> +
> +const struct nla_policy ethnl_module_eeprom_get_policy[] = {
> +	[ETHTOOL_A_MODULE_EEPROM_HEADER]	=
> NLA_POLICY_NESTED(ethnl_header_policy),
> +	[ETHTOOL_A_MODULE_EEPROM_OFFSET]	= { .type = NLA_U32 },
> +	[ETHTOOL_A_MODULE_EEPROM_LENGTH]	= { .type = NLA_U32 },
> +	[ETHTOOL_A_MODULE_EEPROM_PAGE]		= { .type =
> NLA_U8 },
> +	[ETHTOOL_A_MODULE_EEPROM_BANK]		= { .type =
> NLA_U8 },
> +	[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]	=
> +		NLA_POLICY_RANGE(NLA_U8, 0,
> ETH_MODULE_MAX_I2C_ADDRESS), };
> +
> diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index
> 50d3c8896f91..52d1ffb1bca2 100644
> --- a/net/ethtool/netlink.c
> +++ b/net/ethtool/netlink.c
> @@ -245,6 +245,7 @@
> ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
>  	[ETHTOOL_MSG_PAUSE_GET]		=
> &ethnl_pause_request_ops,
>  	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
>  	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
> +	[ETHTOOL_MSG_MODULE_EEPROM_GET]	=
> &ethnl_module_eeprom_request_ops,
>  };
> 
>  static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback
> *cb) @@ -912,6 +913,15 @@ static const struct genl_ops ethtool_genl_ops[]
> = {
>  		.policy = ethnl_tunnel_info_get_policy,
>  		.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
>  	},
> +	{
> +		.cmd	= ETHTOOL_MSG_MODULE_EEPROM_GET,
> +		.doit	= ethnl_default_doit,
> +		.start	= ethnl_default_start,
> +		.dumpit	= ethnl_default_dumpit,
> +		.done	= ethnl_default_done,
> +		.policy = ethnl_module_eeprom_get_policy,
> +		.maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) -
> 1,
> +	},
>  };
> 
>  static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff
--git
> a/net/ethtool/netlink.h b/net/ethtool/netlink.h index
> 6eabd58d81bf..71cbd9df8229 100644
> --- a/net/ethtool/netlink.h
> +++ b/net/ethtool/netlink.h
> @@ -344,6 +344,7 @@ extern const struct ethnl_request_ops
> ethnl_coalesce_request_ops;  extern const struct ethnl_request_ops
> ethnl_pause_request_ops;  extern const struct ethnl_request_ops
> ethnl_eee_request_ops;  extern const struct ethnl_request_ops
> ethnl_tsinfo_request_ops;
> +extern const struct ethnl_request_ops
> ethnl_module_eeprom_request_ops;
> 
>  extern const struct nla_policy
> ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];  extern const struct
> nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@
> -375,6 +376,7 @@ extern const struct nla_policy
> ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +  extern const struct
> nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER +
> 1];  extern const struct nla_policy
> ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
> extern const struct nla_policy
> ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
> +extern const struct nla_policy
> +ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA
> + 1];
> 
>  int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);  int
> ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
> --
> 2.18.2
Andrew Lunn March 23, 2021, 12:27 a.m. UTC | #2
> > +#define ETH_MODULE_EEPROM_PAGE_LEN	256
> 
> Sorry to keep raising issues, but I think you want to make this constant
> 128.

Yes, i also think the KAPI should be limited to returning a maximum of
a 1/2 page per call.

> > +#define MODULE_EEPROM_MAX_OFFSET (257 *
> > ETH_MODULE_EEPROM_PAGE_LEN)
> 
> The device actually has 257 addressable chunks of 128 bytes each.  With
> ETH_MODULE_EEPROM_PAGE_LEN set to 256, your constant is 2X too big.
> 
> Note also, SFP devices (but not QSFP or CMIS) actually have another 256
> bytes available at 0x50, in addition to the full 257*128 at 0x51.  So SFP is
> actually 259*128 or (256 + 257 * 128).
> 
> Devices that don't support pages have much lower limits (256 bytes for
> QSFP/CMIS and 512 for SFP).  Some SFP only support 256 bytes.  Most devices
> will return nonsense for pages above 3.  So, this check is really only an
> absolute limit.  The SFP driver that takes this request will probably check
> against a more refined MAX length (eg modinfo->eeprom_len).
> 
> I suggest setting this constant to 259 * (ETH_MODULE_EEPROM_PAGE_LEN / 2).
> Let the driver refine it from there.

I don't even see a need for this. The offset should be within one 1/2
page, of one bank. So offset >= 0 and <= 127. Length is also > 0 and
<- 127. And offset+length is <= 127.

For the moment, please forget about backwards compatibility with the
IOCTL interface. Lets get a new clean KAPI and a new clean internal
API between the ethtool core and the drivers. Once we have that agreed
on, we can work on the various compatibility shims we need to work
between old and new APIs in various places.

      Andrew
Andrew Lunn March 23, 2021, 12:54 a.m. UTC | #3
> +static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
> +			       struct ethnl_reply_data *reply_base,
> +			       struct genl_info *info)
> +{
> +	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
> +	struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
> +	struct ethtool_module_eeprom page_data = {0};
> +	struct net_device *dev = reply_base->dev;
> +	int ret;
> +
> +	if (!dev->ethtool_ops->get_module_eeprom_by_page)
> +		return -EOPNOTSUPP;
> +
> +	/* Allow dumps either of low or high page without crossing half page boundary */
> +	if ((request->offset < ETH_MODULE_EEPROM_PAGE_LEN / 2 &&
> +	     request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN / 2) ||
> +	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
> +		return -EINVAL;

Please keep all the parameter validation together, in
eeprom_parse_request(). At some point, we may extend
eeprom_parse_request() to make use of extack, to indicate which
parameter is invalid. Just getting an -EINVAL can be hard to debug,
where as NL_SET_ERR_MSG_ATTR() can help the user.

      Andrew
Don Bollinger March 23, 2021, 1:23 a.m. UTC | #4
> Subject: Re: [RFC PATCH V4 net-next 1/5] ethtool: Allow network drivers to
> dump arbitrary EEPROM data
> 
> > > +#define ETH_MODULE_EEPROM_PAGE_LEN	256
> >
> > Sorry to keep raising issues, but I think you want to make this
> > constant 128.
> 
> Yes, i also think the KAPI should be limited to returning a maximum of a
1/2
> page per call.
> 
> > > +#define MODULE_EEPROM_MAX_OFFSET (257 *
> > > ETH_MODULE_EEPROM_PAGE_LEN)
> >
> > The device actually has 257 addressable chunks of 128 bytes each.
> > With ETH_MODULE_EEPROM_PAGE_LEN set to 256, your constant is 2X too
> big.
> >
> > Note also, SFP devices (but not QSFP or CMIS) actually have another
> > 256 bytes available at 0x50, in addition to the full 257*128 at 0x51.
> > So SFP is actually 259*128 or (256 + 257 * 128).
> >
> > Devices that don't support pages have much lower limits (256 bytes for
> > QSFP/CMIS and 512 for SFP).  Some SFP only support 256 bytes.  Most
> > devices will return nonsense for pages above 3.  So, this check is
> > really only an absolute limit.  The SFP driver that takes this request
> > will probably check against a more refined MAX length (eg modinfo-
> >eeprom_len).
> >
> > I suggest setting this constant to 259 * (ETH_MODULE_EEPROM_PAGE_LEN
> / 2).
> > Let the driver refine it from there.
> 
> I don't even see a need for this. The offset should be within one 1/2
page, of
> one bank. So offset >= 0 and <= 127. Length is also > 0 and
> <- 127. And offset+length is <= 127.

I like the clean approach, but...   How do you request low memory?  If all
of the pages start at offset 0, then page 0 is at page 0, offset 0.  That
has always referred to low memory.  (I have actually used 'page -1' for this
in some code I don't share with others.)  I believe all of this code
currently is consistent with the paged area starting at offset 128.  If that
changes to start paged memory at offset 0, there are several places that
will need to be adjusted.

Whatever choice is made, there should be documentation scattered around in
the code, man pages and Documentation directories to tell the
user/programmer whether the paged area starts at offset 0 or offset 128.  It
is constantly confusing, and there is no obvious answer.

> 
> For the moment, please forget about backwards compatibility with the IOCTL
> interface. Lets get a new clean KAPI and a new clean internal API between
> the ethtool core and the drivers. Once we have that agreed on, we can work
> on the various compatibility shims we need to work between old and new
> APIs in various places.
> 
>       Andrew

Don
Andrew Lunn March 23, 2021, 2:03 a.m. UTC | #5
> > I don't even see a need for this. The offset should be within one 1/2
> page, of
> > one bank. So offset >= 0 and <= 127. Length is also > 0 and
> > <- 127. And offset+length is <= 127.
> 
> I like the clean approach, but...   How do you request low memory?

Duh!

I got my conditions wrong. Too focused on 1/2 pages to think that two
of them makes one page!

Lets try again:

offset < 256
0 < len < 128

if (offset < 128)
   offset + len < 128
else
   offset + len < 256

Does that look better?

Reading bytes from the lower 1/2 of page 0 should give the same data
as reading data from the lower 1/2 of page 42. So we can allow that,
but don't be too surprised when an SFP gets it wrong and gives you
rubbish. I would suggest ethtool(1) never actually does read from the
lower 1/2 of any page other than 0.

And i agree about documentation. I would suggest a comment in
ethtool_netlink.h, and the RST documentation.

		   Andrew
Don Bollinger March 23, 2021, 5:47 p.m. UTC | #6
> > > I don't even see a need for this. The offset should be within one
> > > 1/2
> > page, of
> > > one bank. So offset >= 0 and <= 127. Length is also > 0 and
> > > <- 127. And offset+length is <= 127.
> >
> > I like the clean approach, but...   How do you request low memory?
> 
> Duh!
> 
> I got my conditions wrong. Too focused on 1/2 pages to think that two of
> them makes one page!
> 
> Lets try again:
> 
> offset < 256
> 0 < len < 128

Actually 0 < len <= 128.  Length of 128 is not only legal, but very common.
"Read the whole 1/2 page block".

> 
> if (offset < 128)
>    offset + len < 128

Again, offset + len <= 128

> else
>    offset + len < 256

offset + len <= 256

> 
> Does that look better?
> 
> Reading bytes from the lower 1/2 of page 0 should give the same data as
> reading data from the lower 1/2 of page 42. So we can allow that, but
don't
> be too surprised when an SFP gets it wrong and gives you rubbish. I would

The spec is clear that the lower half is the same for all pages.  If the SFP
gives you rubbish you should throw the device in the rubbish.

> suggest ethtool(1) never actually does read from the lower 1/2 of any page
> other than 0.

I agree, despite my previous comment.  While the spec is clear that should
work, I believe virtually all such instances are bugs not yet discovered.

And, note that the legacy API provides no way to access lower memory from
any page but 0.  There's just no syntax for it.  Not that we care about
legacy :-).

> 
> And i agree about documentation. I would suggest a comment in
> ethtool_netlink.h, and the RST documentation.
> 
> 		   Andrew

Don
Andrew Lunn March 23, 2021, 10:17 p.m. UTC | #7
> The spec is clear that the lower half is the same for all pages.  If the SFP
> gives you rubbish you should throw the device in the rubbish.

Sometimes you don't get the choice. You have to use the GPON SFP the
ISP sent you, if you want FTTH. And they tend to be cheap and broken
with respect to the spec.

    Andrew
Moshe Shemesh March 24, 2021, 10:03 a.m. UTC | #8
On 3/23/2021 2:27 AM, Andrew Lunn wrote:
>
>>> +#define ETH_MODULE_EEPROM_PAGE_LEN 256
>> Sorry to keep raising issues, but I think you want to make this constant
>> 128.
> Yes, i also think the KAPI should be limited to returning a maximum of
> a 1/2 page per call.
OK.
>>> +#define MODULE_EEPROM_MAX_OFFSET (257 *
>>> ETH_MODULE_EEPROM_PAGE_LEN)
>> The device actually has 257 addressable chunks of 128 bytes each.  With
>> ETH_MODULE_EEPROM_PAGE_LEN set to 256, your constant is 2X too big.
>>
>> Note also, SFP devices (but not QSFP or CMIS) actually have another 256
>> bytes available at 0x50, in addition to the full 257*128 at 0x51.  So SFP is
>> actually 259*128 or (256 + 257 * 128).
>>
>> Devices that don't support pages have much lower limits (256 bytes for
>> QSFP/CMIS and 512 for SFP).  Some SFP only support 256 bytes.  Most devices
>> will return nonsense for pages above 3.  So, this check is really only an
>> absolute limit.  The SFP driver that takes this request will probably check
>> against a more refined MAX length (eg modinfo->eeprom_len).
>>
>> I suggest setting this constant to 259 * (ETH_MODULE_EEPROM_PAGE_LEN / 2).
>> Let the driver refine it from there.
> I don't even see a need for this. The offset should be within one 1/2
> page, of one bank. So offset >= 0 and <= 127. Length is also > 0 and
> <- 127. And offset+length is <= 127.
>
> For the moment, please forget about backwards compatibility with the
> IOCTL interface. Lets get a new clean KAPI and a new clean internal
> API between the ethtool core and the drivers. Once we have that agreed
> on, we can work on the various compatibility shims we need to work
> between old and new APIs in various places.


OK, so following the comments here, I will ignore backward compatibility 
of having global offset and length.

That means I assume in this KAPI I am asked to get data from specific 
page. Should I also require user space to send page number to this KAPI 
(I mean make page number mandatory) ?

>        Andrew
Moshe Shemesh March 24, 2021, 10:05 a.m. UTC | #9
On 3/23/2021 2:54 AM, Andrew Lunn wrote:
>
>> +static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
>> +                            struct ethnl_reply_data *reply_base,
>> +                            struct genl_info *info)
>> +{
>> +     struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
>> +     struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
>> +     struct ethtool_module_eeprom page_data = {0};
>> +     struct net_device *dev = reply_base->dev;
>> +     int ret;
>> +
>> +     if (!dev->ethtool_ops->get_module_eeprom_by_page)
>> +             return -EOPNOTSUPP;
>> +
>> +     /* Allow dumps either of low or high page without crossing half page boundary */
>> +     if ((request->offset < ETH_MODULE_EEPROM_PAGE_LEN / 2 &&
>> +          request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN / 2) ||
>> +         request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
>> +             return -EINVAL;
> Please keep all the parameter validation together, in
> eeprom_parse_request(). At some point, we may extend
> eeprom_parse_request() to make use of extack, to indicate which
> parameter is invalid. Just getting an -EINVAL can be hard to debug,
> where as NL_SET_ERR_MSG_ATTR() can help the user.
Sure, we can add that.
>        Andrew
Moshe Shemesh March 24, 2021, 10:14 a.m. UTC | #10
On 3/23/2021 7:47 PM, Don Bollinger wrote:
>>>> I don't even see a need for this. The offset should be within one
>>>> 1/2
>>> page, of
>>>> one bank. So offset >= 0 and <= 127. Length is also > 0 and
>>>> <- 127. And offset+length is <= 127.
>>> I like the clean approach, but...   How do you request low memory?
>> Duh!
>>
>> I got my conditions wrong. Too focused on 1/2 pages to think that two of
>> them makes one page!
>>
>> Lets try again:
>>
>> offset < 256
>> 0 < len < 128
> Actually 0 < len <= 128.  Length of 128 is not only legal, but very common.
> "Read the whole 1/2 page block".
Ack.
>> if (offset < 128)
>>     offset + len < 128
> Again, offset + len <= 128
>
>> else
>>     offset + len < 256
> offset + len <= 256
Ack.
>> Does that look better?
>>
>> Reading bytes from the lower 1/2 of page 0 should give the same data as
>> reading data from the lower 1/2 of page 42. So we can allow that, but
> don't
>> be too surprised when an SFP gets it wrong and gives you rubbish. I would
> The spec is clear that the lower half is the same for all pages.  If the SFP
> gives you rubbish you should throw the device in the rubbish.
>
>> suggest ethtool(1) never actually does read from the lower 1/2 of any page
>> other than 0.
> I agree, despite my previous comment.  While the spec is clear that should
> work, I believe virtually all such instances are bugs not yet discovered.


Agreed, so we will accept offset < 128 only on page zero.

> And, note that the legacy API provides no way to access lower memory from
> any page but 0.  There's just no syntax for it.  Not that we care about
> legacy :-).
>
>> And i agree about documentation. I would suggest a comment in
>> ethtool_netlink.h, and the RST documentation.


Ack, will comment there on limiting new KAPI only to half pages reading.

We may address reading cross pages by user space (implementing multiple 
calls to KAPI to get such data).

>>                   Andrew
> Don
>
>
Andrew Lunn March 24, 2021, 12:15 p.m. UTC | #11
> OK, so following the comments here, I will ignore backward compatibility of
> having global offset and length.

Yes, we can do that in userspace.

> That means I assume in this KAPI I am asked to get data from specific page.
> Should I also require user space to send page number to this KAPI (I mean
> make page number mandatory) ?

It makes the API more explicit. We always need the page, so if it is
not passed you need to default to something. As with addresses, you
have no way to pass back what page was actually read. So yes, make it
mandatory.

And i suppose the next question is, do we make bank mandatory? Once
you have a page > 0x10, you need the bank. Either we need to encode
the logic of when a bank is needed, and make it mandatory then, or it
is always mandatory.

Given the general pattern, we make it mandatory.

But sometime in the future when we get yet another SFP format, with
additional parameters, they will be optional, in order to keep
backwards compatibility.

	  Andrew
diff mbox series

Patch

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 05073482db05..b370d1186ceb 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1280,6 +1280,34 @@  Kernel response contents:
 For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that
 the table contains static entries, hard-coded by the NIC.
 
+MODULE_EEPROM
+===========
+
+Fetch module EEPROM data dump.
+
+Request contents:
+
+  =======================================  ======  ==========================
+  ``ETHTOOL_A_MODULE_EEPROM_HEADER``       nested  request header
+  ``ETHTOOL_A_MODULE_EEPROM_OFFSET``       u32     offset within a page
+  ``ETHTOOL_A_MODULE_EEPROM_LENGTH``       u32     amount of bytes to read
+  ``ETHTOOL_A_MODULE_EEPROM_PAGE``         u8      page number
+  ``ETHTOOL_A_MODULE_EEPROM_BANK``         u8      bank number
+  ``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS``  u8      page I2C address
+  =======================================  ======  ==========================
+
+Kernel response contents:
+
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_MODULE_EEPROM_HEADER``          | nested | reply header        |
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_MODULE_EEPROM_DATA``            | nested | array of bytes from |
+ |                                             |        | module EEPROM       |
+ +---------------------------------------------+--------+---------------------+
+
+``ETHTOOL_A_MODULE_EEPROM_DATA`` has an attribute length equal to the amount of
+bytes driver actually read.
+
 Request translation
 ===================
 
@@ -1357,8 +1385,8 @@  are netlink only.
   ``ETHTOOL_GET_DUMP_FLAG``           n/a
   ``ETHTOOL_GET_DUMP_DATA``           n/a
   ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
-  ``ETHTOOL_GMODULEINFO``             n/a
-  ``ETHTOOL_GMODULEEEPROM``           n/a
+  ``ETHTOOL_GMODULEINFO``             ``ETHTOOL_MSG_MODULE_EEPROM_GET``
+  ``ETHTOOL_GMODULEEEPROM``           ``ETHTOOL_MSG_MODULE_EEPROM_GET``
   ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
   ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
   ``ETHTOOL_GRSSH``                   n/a
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ec4cd3921c67..2f6df3b75dd6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -81,6 +81,7 @@  enum {
 #define ETH_RSS_HASH_NO_CHANGE	0
 
 struct net_device;
+struct netlink_ext_ack;
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
@@ -265,6 +266,31 @@  struct ethtool_pause_stats {
 	u64 rx_pause_frames;
 };
 
+#define ETH_MODULE_EEPROM_PAGE_LEN	256
+#define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
+
+/**
+ * struct ethtool_eeprom_data - EEPROM dump from specified page
+ * @offset: Offset within the specified EEPROM page to begin read, in bytes.
+ * @length: Number of bytes to read.
+ * @page: Page number to read from.
+ * @bank: Page bank number to read from, if applicable by EEPROM spec.
+ * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
+ *	EEPROMs use 0x50 or 0x51.
+ * @data: Pointer to buffer with EEPROM data of @length size.
+ *
+ * This can be used to manage pages during EEPROM dump in ethtool and pass
+ * required information to the driver.
+ */
+struct ethtool_module_eeprom {
+	__u32	offset;
+	__u32	length;
+	__u8	page;
+	__u8	bank;
+	__u8	i2c_address;
+	__u8	*data;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @cap_link_lanes_supported: indicates if the driver supports lanes
@@ -410,6 +436,9 @@  struct ethtool_pause_stats {
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
  *	This is only useful if the device maintains PHY statistics and
  *	cannot use the standard PHY library helpers.
+ * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from
+ *	specified page. Returns a negative error code or the amount of bytes
+ *	read.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -515,6 +544,9 @@  struct ethtool_ops {
 				   const struct ethtool_tunable *, void *);
 	int	(*set_phy_tunable)(struct net_device *,
 				   const struct ethtool_tunable *, const void *);
+	int	(*get_module_eeprom_by_page)(struct net_device *dev,
+					     const struct ethtool_module_eeprom *page,
+					     struct netlink_ext_ack *extack);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
@@ -538,7 +570,6 @@  int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 				       const struct ethtool_link_ksettings *cmd,
 				       u32 *dev_speed, u8 *dev_duplex);
 
-struct netlink_ext_ack;
 struct phy_device;
 struct phy_tdr_config;
 
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index a286635ac9b8..c99dea5afbb2 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -42,6 +42,7 @@  enum {
 	ETHTOOL_MSG_CABLE_TEST_ACT,
 	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
 	ETHTOOL_MSG_TUNNEL_INFO_GET,
+	ETHTOOL_MSG_MODULE_EEPROM_GET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -80,6 +81,7 @@  enum {
 	ETHTOOL_MSG_CABLE_TEST_NTF,
 	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
 	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -629,6 +631,23 @@  enum {
 	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
 };
 
+/* MODULE EEPROM */
+
+enum {
+	ETHTOOL_A_MODULE_EEPROM_UNSPEC,
+	ETHTOOL_A_MODULE_EEPROM_HEADER,			/* nest - _A_HEADER_* */
+
+	ETHTOOL_A_MODULE_EEPROM_OFFSET,			/* u32 */
+	ETHTOOL_A_MODULE_EEPROM_LENGTH,			/* u32 */
+	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_DATA,			/* nested */
+
+	__ETHTOOL_A_MODULE_EEPROM_CNT,
+	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 7a849ff22dad..d604346bc074 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@  obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o
+		   tunnels.o eeprom.o
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
new file mode 100644
index 000000000000..79d75e0e2391
--- /dev/null
+++ b/net/ethtool/eeprom.c
@@ -0,0 +1,161 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include "netlink.h"
+#include "common.h"
+
+struct eeprom_req_info {
+	struct ethnl_req_info	base;
+	u32			offset;
+	u32			length;
+	u8			page;
+	u8			bank;
+	u8			i2c_address;
+};
+
+struct eeprom_reply_data {
+	struct ethnl_reply_data base;
+	u32			length;
+	u8			*data;
+};
+
+/* Module EEPROMs use one byte to store page number, while each page is 256
+ * bytes long, so in principle offset is limited by a multiple of that,
+ * plus the length of an additional SFP page located at high I2C address.
+ */
+#define MODULE_EEPROM_MAX_OFFSET (257 * ETH_MODULE_EEPROM_PAGE_LEN)
+
+#define MODULE_EEPROM_REQINFO(__req_base) \
+	container_of(__req_base, struct eeprom_req_info, base)
+
+#define MODULE_EEPROM_REPDATA(__reply_base) \
+	container_of(__reply_base, struct eeprom_reply_data, base)
+
+static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
+			       struct ethnl_reply_data *reply_base,
+			       struct genl_info *info)
+{
+	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
+	struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
+	struct ethtool_module_eeprom page_data = {0};
+	struct net_device *dev = reply_base->dev;
+	int ret;
+
+	if (!dev->ethtool_ops->get_module_eeprom_by_page)
+		return -EOPNOTSUPP;
+
+	/* Allow dumps either of low or high page without crossing half page boundary */
+	if ((request->offset < ETH_MODULE_EEPROM_PAGE_LEN / 2 &&
+	     request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN / 2) ||
+	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
+		return -EINVAL;
+
+	page_data.offset = request->offset;
+	page_data.length = request->length;
+	page_data.i2c_address = request->i2c_address;
+	page_data.page = request->page;
+	page_data.bank = request->bank;
+	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
+	if (!page_data.data)
+		return -ENOMEM;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret)
+		goto err_free;
+
+	ret = dev->ethtool_ops->get_module_eeprom_by_page(dev, &page_data,
+							  info->extack);
+	if (ret < 0)
+		goto err_ops;
+
+	reply->length = ret;
+	reply->data = page_data.data;
+
+	ethnl_ops_complete(dev);
+	return 0;
+
+err_ops:
+	ethnl_ops_complete(dev);
+err_free:
+	kfree(page_data.data);
+	return ret;
+}
+
+static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
+				struct netlink_ext_ack *extack)
+{
+	struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_info);
+
+	if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] ||
+	    !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] ||
+	    !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS])
+		return -EINVAL;
+
+	request->i2c_address = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]);
+	request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]);
+	request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]);
+
+	if (request->offset > MODULE_EEPROM_MAX_OFFSET)
+		return -EINVAL;
+
+	if (tb[ETHTOOL_A_MODULE_EEPROM_PAGE]) {
+		request->page = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_PAGE]);
+		if (request->page > 0 &&
+		    (request->offset < ETH_MODULE_EEPROM_PAGE_LEN / 2 ||
+		     request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN))
+			return -EINVAL;
+	}
+	if (tb[ETHTOOL_A_MODULE_EEPROM_BANK])
+		request->bank = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_BANK]);
+
+	return 0;
+}
+
+static int eeprom_reply_size(const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	const struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
+
+	return nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */
+}
+
+static int eeprom_fill_reply(struct sk_buff *skb,
+			     const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
+
+	return nla_put(skb, ETHTOOL_A_MODULE_EEPROM_DATA, reply->length, reply->data);
+}
+
+static void eeprom_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
+
+	kfree(reply->data);
+}
+
+const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_MODULE_EEPROM_GET,
+	.reply_cmd		= ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_MODULE_EEPROM_HEADER,
+	.req_info_size		= sizeof(struct eeprom_req_info),
+	.reply_data_size	= sizeof(struct eeprom_reply_data),
+
+	.parse_request		= eeprom_parse_request,
+	.prepare_data		= eeprom_prepare_data,
+	.reply_size		= eeprom_reply_size,
+	.fill_reply		= eeprom_fill_reply,
+	.cleanup_data		= eeprom_cleanup_data,
+};
+
+const struct nla_policy ethnl_module_eeprom_get_policy[] = {
+	[ETHTOOL_A_MODULE_EEPROM_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_MODULE_EEPROM_OFFSET]	= { .type = NLA_U32 },
+	[ETHTOOL_A_MODULE_EEPROM_LENGTH]	= { .type = NLA_U32 },
+	[ETHTOOL_A_MODULE_EEPROM_PAGE]		= { .type = NLA_U8 },
+	[ETHTOOL_A_MODULE_EEPROM_BANK]		= { .type = NLA_U8 },
+	[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]	=
+		NLA_POLICY_RANGE(NLA_U8, 0, ETH_MODULE_MAX_I2C_ADDRESS),
+};
+
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 50d3c8896f91..52d1ffb1bca2 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -245,6 +245,7 @@  ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_PAUSE_GET]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
+	[ETHTOOL_MSG_MODULE_EEPROM_GET]	= &ethnl_module_eeprom_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -912,6 +913,15 @@  static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_tunnel_info_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_MODULE_EEPROM_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_module_eeprom_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 6eabd58d81bf..71cbd9df8229 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -344,6 +344,7 @@  extern const struct ethnl_request_ops ethnl_coalesce_request_ops;
 extern const struct ethnl_request_ops ethnl_pause_request_ops;
 extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -375,6 +376,7 @@  extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +
 extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
+extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);