Message ID | 20240819223442.48013-3-anthony.l.nguyen@intel.com (mailing list archive) |
---|---|
State | Rejected |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | idpf: XDP chapter II: convert Tx completion to libeth | expand |
On Mon, 19 Aug 2024 15:34:34 -0700 Tony Nguyen wrote: > + * Return: new &net_device on success, %NULL on error. > + */ > +struct net_device *__libeth_netdev_alloc(u32 priv, u32 rqs, u32 sqs, > + u32 xdpsqs) The netdev alloc / free / set num queues can be a separate patch > +MODULE_DESCRIPTION("Common Ethernet library"); BTW for Intel? Or you want this to be part of the core? I thought Intel, but you should tell us if you have broader plans. > + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ > + \ > + memset(stats, 0, sizeof(*stats)); \ > + u64_stats_init(&stats->syncp); \ > + \ > + mutex_init(&priv->base_##pfx##s[qid].lock); \ the mutex is only for writes or for reads of base too? mutex is a bad idea for rtnl stats > +/* Netlink stats. Exported fields have the same names as in the NL structs */ > + > +struct libeth_stats_export { > + u16 li; > + u16 gi; > +}; > + > +#define LIBETH_STATS_EXPORT(lpfx, gpfx, field) { \ > + .li = (offsetof(struct libeth_##lpfx##_stats, field) - \ > + offsetof(struct libeth_##lpfx##_stats, raw)) / \ > + sizeof_field(struct libeth_##lpfx##_stats, field), \ > + .gi = offsetof(struct netdev_queue_stats_##gpfx, field) / \ > + sizeof_field(struct netdev_queue_stats_##gpfx, field) \ > +} humpf > +#define LIBETH_STATS_DEFINE_EXPORT(pfx, gpfx) \ > +static void \ > +libeth_get_queue_stats_##gpfx(struct net_device *dev, int idx, \ > + struct netdev_queue_stats_##gpfx *stats) \ > +{ \ > + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ > + const struct libeth_##pfx##_stats *qs; \ > + u64 *raw = (u64 *)stats; \ > + u32 start; \ > + \ > + qs = READ_ONCE(priv->live_##pfx##s[idx]); \ > + if (!qs) \ > + return; \ > + \ > + do { \ > + start = u64_stats_fetch_begin(&qs->syncp); \ > + \ > + libeth_stats_foreach_export(pfx, exp) \ > + raw[exp->gi] = u64_stats_read(&qs->raw[exp->li]); \ > + } while (u64_stats_fetch_retry(&qs->syncp, start)); \ > +} \ ugh. Please no > + \ > +static void \ > +libeth_get_##pfx##_base_stats(const struct net_device *dev, \ > + struct netdev_queue_stats_##gpfx *stats) \ > +{ \ > + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ > + u64 *raw = (u64 *)stats; \ > + \ > + memset(stats, 0, sizeof(*(stats))); \ Have you read the docs for any of the recent stats APIs? Nack. Just implement the APIs in the driver, this does not seem like a sane starting point _at all_. You're going to waste more time coming up with such abstraction than you'd save implementing it for 10 drivers.
From: Jakub Kicinski <kuba@kernel.org> Date: Tue, 20 Aug 2024 18:17:57 -0700 > On Mon, 19 Aug 2024 15:34:34 -0700 Tony Nguyen wrote: >> + * Return: new &net_device on success, %NULL on error. >> + */ >> +struct net_device *__libeth_netdev_alloc(u32 priv, u32 rqs, u32 sqs, >> + u32 xdpsqs) > > The netdev alloc / free / set num queues can be a separate patch Ok sure. > >> +MODULE_DESCRIPTION("Common Ethernet library"); I just moved this line from libeth/rx.c :D > > BTW for Intel? Or you want this to be part of the core? > I thought Intel, but you should tell us if you have broader plans. For now it's done as a lib inside Intel folder, BUT if any other vendor would like to use this, that would be great and then we could move it level up or some of these APIs can go into the core. IOW depends on users. libie in contrary contains HW-specific code and will always be Intel-specific. > >> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ >> + \ >> + memset(stats, 0, sizeof(*stats)); \ >> + u64_stats_init(&stats->syncp); \ >> + \ >> + mutex_init(&priv->base_##pfx##s[qid].lock); \ > > the mutex is only for writes or for reads of base too? > mutex is a bad idea for rtnl stats Base stats are written only on ifdown, read anytime, mutex is used everywhere. Hmm maybe a bad idea, what would be better, spinlock or just use u64_sync as well? > >> +/* Netlink stats. Exported fields have the same names as in the NL structs */ >> + >> +struct libeth_stats_export { >> + u16 li; >> + u16 gi; >> +}; >> + >> +#define LIBETH_STATS_EXPORT(lpfx, gpfx, field) { \ >> + .li = (offsetof(struct libeth_##lpfx##_stats, field) - \ >> + offsetof(struct libeth_##lpfx##_stats, raw)) / \ >> + sizeof_field(struct libeth_##lpfx##_stats, field), \ >> + .gi = offsetof(struct netdev_queue_stats_##gpfx, field) / \ >> + sizeof_field(struct netdev_queue_stats_##gpfx, field) \ >> +} > > humpf Compression :D > >> +#define LIBETH_STATS_DEFINE_EXPORT(pfx, gpfx) \ >> +static void \ >> +libeth_get_queue_stats_##gpfx(struct net_device *dev, int idx, \ >> + struct netdev_queue_stats_##gpfx *stats) \ >> +{ \ >> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ >> + const struct libeth_##pfx##_stats *qs; \ >> + u64 *raw = (u64 *)stats; \ >> + u32 start; \ >> + \ >> + qs = READ_ONCE(priv->live_##pfx##s[idx]); \ >> + if (!qs) \ >> + return; \ >> + \ >> + do { \ >> + start = u64_stats_fetch_begin(&qs->syncp); \ >> + \ >> + libeth_stats_foreach_export(pfx, exp) \ >> + raw[exp->gi] = u64_stats_read(&qs->raw[exp->li]); \ >> + } while (u64_stats_fetch_retry(&qs->syncp, start)); \ >> +} \ > > ugh. Please no So you mean just open-code reads/writes per each field than to compress it that way? Sure, that would be no problem. Object code doesn't even change (my first approach was per-field). > >> + \ >> +static void \ >> +libeth_get_##pfx##_base_stats(const struct net_device *dev, \ >> + struct netdev_queue_stats_##gpfx *stats) \ >> +{ \ >> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ >> + u64 *raw = (u64 *)stats; \ >> + \ >> + memset(stats, 0, sizeof(*(stats))); \ > > Have you read the docs for any of the recent stats APIs? You mean to leave 0xffs for unsupported fields? > > Nack. Just implement the APIs in the driver, this does not seem like > a sane starting point _at all_. You're going to waste more time coming > up with such abstraction than you'd save implementing it for 10 drivers. I believe this nack is for generic Netlink stats, not the whole, right? In general, I wasn't sure about whether it would be better to leave Netlink stats per driver or write it in libeth, so I wanted to see opinions of others. I'm fine with either way. Thanks, Olek
On Thu, 22 Aug 2024 17:13:57 +0200 Alexander Lobakin wrote: > > BTW for Intel? Or you want this to be part of the core? > > I thought Intel, but you should tell us if you have broader plans. > > For now it's done as a lib inside Intel folder, BUT if any other vendor > would like to use this, that would be great and then we could move it > level up or some of these APIs can go into the core. > IOW depends on users. > > libie in contrary contains HW-specific code and will always be > Intel-specific. Seems like an odd middle ground. If you believe it's generic finding another driver shouldn't be hard. > >> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ > >> + \ > >> + memset(stats, 0, sizeof(*stats)); \ > >> + u64_stats_init(&stats->syncp); \ > >> + \ > >> + mutex_init(&priv->base_##pfx##s[qid].lock); \ > > > > the mutex is only for writes or for reads of base too? > > mutex is a bad idea for rtnl stats > > Base stats are written only on ifdown, read anytime, mutex is used > everywhere. > Hmm maybe a bad idea, what would be better, spinlock or just use > u64_sync as well? Depends quite a bit on whether driver uses per queues stats to get rtnl stats. And whether reading of the stats needs to sleep. > >> +#define LIBETH_STATS_DEFINE_EXPORT(pfx, gpfx) \ > >> +static void \ > >> +libeth_get_queue_stats_##gpfx(struct net_device *dev, int idx, \ > >> + struct netdev_queue_stats_##gpfx *stats) \ > >> +{ \ > >> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ > >> + const struct libeth_##pfx##_stats *qs; \ > >> + u64 *raw = (u64 *)stats; \ > >> + u32 start; \ > >> + \ > >> + qs = READ_ONCE(priv->live_##pfx##s[idx]); \ > >> + if (!qs) \ > >> + return; \ > >> + \ > >> + do { \ > >> + start = u64_stats_fetch_begin(&qs->syncp); \ > >> + \ > >> + libeth_stats_foreach_export(pfx, exp) \ > >> + raw[exp->gi] = u64_stats_read(&qs->raw[exp->li]); \ > >> + } while (u64_stats_fetch_retry(&qs->syncp, start)); \ > >> +} \ > > > > ugh. Please no > > So you mean just open-code reads/writes per each field than to compress > it that way? Yes. <rant> I don't understand why people try to be clever and complicate stats reading for minor LoC saving (almost everyone, including those working on fbnic). Just type the code in -- it makes maintaining it, grepping and adding a new stat without remembering all the details soo much easier. </rant> > Sure, that would be no problem. Object code doesn't even > change (my first approach was per-field). > > >> + \ > >> +static void \ > >> +libeth_get_##pfx##_base_stats(const struct net_device *dev, \ > >> + struct netdev_queue_stats_##gpfx *stats) \ > >> +{ \ > >> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ > >> + u64 *raw = (u64 *)stats; \ > >> + \ > >> + memset(stats, 0, sizeof(*(stats))); \ > > > > Have you read the docs for any of the recent stats APIs? > > You mean to leave 0xffs for unsupported fields? Kinda of. But also I do mean to call out that you haven't read the doc for the interface over which you're building an abstraction
From: Jakub Kicinski <kuba@kernel.org> Date: Thu, 22 Aug 2024 16:17:18 -0700 > On Thu, 22 Aug 2024 17:13:57 +0200 Alexander Lobakin wrote: >>> BTW for Intel? Or you want this to be part of the core? >>> I thought Intel, but you should tell us if you have broader plans. >> >> For now it's done as a lib inside Intel folder, BUT if any other vendor >> would like to use this, that would be great and then we could move it >> level up or some of these APIs can go into the core. >> IOW depends on users. >> >> libie in contrary contains HW-specific code and will always be >> Intel-specific. > > Seems like an odd middle ground. If you believe it's generic finding > another driver shouldn't be hard. But it's up to the vendors right, I can't force them to use this code or just switch their driver to use it :D [...] >> So you mean just open-code reads/writes per each field than to compress >> it that way? > > Yes. <rant> I don't understand why people try to be clever and > complicate stats reading for minor LoC saving (almost everyone, > including those working on fbnic). Just type the code in -- it > makes maintaining it, grepping and adding a new stat without > remembering all the details soo much easier. </rant> In some cases, not this one, iterating over an array means way less object code than open-coded per-field assignment. Just try do that for 50 fields and you'll see. > >> Sure, that would be no problem. Object code doesn't even >> change (my first approach was per-field). >> >>>> + \ >>>> +static void \ >>>> +libeth_get_##pfx##_base_stats(const struct net_device *dev, \ >>>> + struct netdev_queue_stats_##gpfx *stats) \ >>>> +{ \ >>>> + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ >>>> + u64 *raw = (u64 *)stats; \ >>>> + \ >>>> + memset(stats, 0, sizeof(*(stats))); \ >>> >>> Have you read the docs for any of the recent stats APIs? >> >> You mean to leave 0xffs for unsupported fields? > > Kinda of. But also I do mean to call out that you haven't read the doc > for the interface over which you're building an abstraction
On Fri, 23 Aug 2024 14:59:17 +0200 Alexander Lobakin wrote: > >> For now it's done as a lib inside Intel folder, BUT if any other vendor > >> would like to use this, that would be great and then we could move it > >> level up or some of these APIs can go into the core. > >> IOW depends on users. > >> > >> libie in contrary contains HW-specific code and will always be > >> Intel-specific. > > > > Seems like an odd middle ground. If you believe it's generic finding > > another driver shouldn't be hard. > > But it's up to the vendors right, I can't force them to use this code or > just switch their driver to use it :D It shouldn't be up to interpretation whether the library makes code better. If it doesn't -- what's the point of the library. If it does -- the other vendors better have a solid reason to push back. > >> So you mean just open-code reads/writes per each field than to compress > >> it that way? > > > > Yes. <rant> I don't understand why people try to be clever and > > complicate stats reading for minor LoC saving (almost everyone, > > including those working on fbnic). Just type the code in -- it > > makes maintaining it, grepping and adding a new stat without > > remembering all the details soo much easier. </rant> > > In some cases, not this one, iterating over an array means way less > object code than open-coded per-field assignment. Just try do that for > 50 fields and you'll see. Do you have numbers? How much binary code is 50 simple moves on x86? > >> You mean to leave 0xffs for unsupported fields? > > > > Kinda of. But also I do mean to call out that you haven't read the doc > > for the interface over which you're building an abstraction
From: Jakub Kicinski <kuba@kernel.org> Date: Mon, 26 Aug 2024 18:09:21 -0700 > On Fri, 23 Aug 2024 14:59:17 +0200 Alexander Lobakin wrote: >>>> For now it's done as a lib inside Intel folder, BUT if any other vendor >>>> would like to use this, that would be great and then we could move it >>>> level up or some of these APIs can go into the core. >>>> IOW depends on users. >>>> >>>> libie in contrary contains HW-specific code and will always be >>>> Intel-specific. >>> >>> Seems like an odd middle ground. If you believe it's generic finding >>> another driver shouldn't be hard. >> >> But it's up to the vendors right, I can't force them to use this code or >> just switch their driver to use it :D > > It shouldn't be up to interpretation whether the library makes code > better. If it doesn't -- what's the point of the library. If it does > -- the other vendors better have a solid reason to push back. Potential reasons to push back (by "we" I mean some vendor X): * we want our names for Ethtool stats, not yours ("txq_X" instead of "sqX" and so on), we have scripts which already parse our names blah * we have our own infra and we just don't want / have time/resources to refactor and then test since it's not something critical > >>>> So you mean just open-code reads/writes per each field than to compress >>>> it that way? >>> >>> Yes. <rant> I don't understand why people try to be clever and >>> complicate stats reading for minor LoC saving (almost everyone, >>> including those working on fbnic). Just type the code in -- it >>> makes maintaining it, grepping and adding a new stat without >>> remembering all the details soo much easier. </rant> >> >> In some cases, not this one, iterating over an array means way less >> object code than open-coded per-field assignment. Just try do that for >> 50 fields and you'll see. > > Do you have numbers? How much binary code is 50 simple moves on x86? for (u32 i = 0; i < 50; i++) structX->field[i] = something * i; open-coding this loop to assign 50 fields manually gives me +483 bytes of object code on -O2. But these two lines scale better than adding a new assignment for each new field (and then forget to do that for some field and debug why the stats are incorrect). > >>>> You mean to leave 0xffs for unsupported fields? >>> >>> Kinda of. But also I do mean to call out that you haven't read the doc >>> for the interface over which you're building an abstraction
On Tue, 27 Aug 2024 17:31:55 +0200 Alexander Lobakin wrote: > >> But it's up to the vendors right, I can't force them to use this code or > >> just switch their driver to use it :D > > > > It shouldn't be up to interpretation whether the library makes code > > better. If it doesn't -- what's the point of the library. If it does > > -- the other vendors better have a solid reason to push back. > > Potential reasons to push back (by "we" I mean some vendor X): > > * we want our names for Ethtool stats, not yours ("txq_X" instead of > "sqX" and so on), we have scripts which already parse our names blah If the stat is standardized in a library why is it dumped via ethtool -S > * we have our own infra and we just don't want / have time/resources to > refactor and then test since it's not something critical Quite hypothetical. > >> In some cases, not this one, iterating over an array means way less > >> object code than open-coded per-field assignment. Just try do that for > >> 50 fields and you'll see. > > > > Do you have numbers? How much binary code is 50 simple moves on x86? > > for (u32 i = 0; i < 50; i++) > structX->field[i] = something * i; > > open-coding this loop to assign 50 fields manually gives me +483 bytes > of object code on -O2. 10 bytes per stat then. The stat itself is 8 times number of queues. > But these two lines scale better than adding a new assignment for each > new field (and then forget to do that for some field and debug why the > stats are incorrect). Scale? This is control path code, first prio is for it to be correct, second to be readable. > >> But I have... > > > > Read, or saw it? > > Something in between these two, but I'll reread since you're insisting. Please do. Docs for *any* the stats I've added in the last 3 years will do. > >> But why should it propagate? > > > > I'm saying it shouldn't. The next NIC driver Intel (inevitably :)) > > FYI I already nack inside Intel any new drivers since I was promised > that each next generation will be based on top of idpf. I don't mind new drivers, how they get written is a bigger problem, but that's a different discussion. > >> I'll be happy to remove that for basic Rx/Tx queues (and leave only > >> those which don't exist yet in the NL stats) and when you introduce > >> more fields to NL stats, removing more from ethtool -S in this > >> library will be easy. > > > > I don't scale to remembering 1 easy thing for each driver we have. > > Introducing a new field is adding 1 line with its name to the macro > since everything else gets expanded from these macros anyway. Are you saying that people on your team struggle to add a statistic? #1 correctness, #2 readability. LoC only matters indirectly under #2. > >> But let's say what should we do with XDP Tx > >> queues? They're invisible to rtnl as they are past real_num_tx_queues. > > > > They go to ethtool -S today. It should be relatively easy to start > > reporting them. I didn't add them because I don't have a clear use > > case at the moment. > > The same as for regular Tx: debugging, imbalance etc. I'm not trying to imply they are useless. I just that I, subjectively, don't have a clear use case in Meta's production env. > > save space on all the stats I'm not implementing. > > The stats I introduced here are supported by most, if not every, modern > NIC drivers. Not supporting header split or HW GRO will save you 16 > bytes on the queue struct which I don't think is a game changer. You don't understand. I built some infra over the last 3 years. You didn't bother reading it. Now you pop our own thing to the side, extending ethtool -S which is _unusable_ in a multi-vendor, production environment. > >> * implementing NL stats in drivers, not here; not exporting NL stats > >> to ethtool -S > >> > >> A driver wants to export a field which is missing in the lib? It's a > >> couple lines to add it. Another driver doesn't support this field and > >> you want it to still be 0xff there? Already noted and I'm already > >> implementing a different model. > > > > I think it will be very useful if you could step back and put on paper > > what your goals are with this work, exactly. > > My goals: > > * reduce boilerplate code in drivers: declaring stats structures, > Ethtool stats names, all these collecting, aggregating etc etc, you see > in the last commit of the series how many LoCs get deleted from idpf, > +/- the same amount would be removed from any other driver 21 files changed, 1634 insertions(+), 1002 deletions(-) > * reduce the time people debug and fix bugs in stats since it will be > just in one place, not in each driver Examples ? > * have more consistent names in ethtool -S Forget it, better infra exists. Your hack to make stat count "consistent" doesn't work either. Unless you assume only one process can call ethtool -S at a time. > * have more consistent stats sets in drivers since even within Intel > drivers it's a bit of a mess which stats are exported etc. Consistently undocumented :( qstats exist and are documented. > Most of your pushback here sounds like if I would try to introduce this > in the core code, but I don't do that here. This infra saves a lot of > locs s/saves/may save/ ? > and time when used in the Intel drivers and it would be totally > fine for me if some pieces of the lib goes into the core, but these > stats don't. > I didn't declare anywhere that everyone must use it or that it's core > code, do you want me to change this MODULE_DESCRIPTION()? I deeply dislike the macro templating. Complex local systems of this sort make it really painful to do cross-driver changes. It's fine for you because you wrote this, but speaking from experience (mlx5) it makes modifying the driver for an outside much harder. If you think the core infra is lacking, please improve it instead of inventing your own, buggy one.
From: Jakub Kicinski <kuba@kernel.org> Date: Tue, 27 Aug 2024 11:29:33 -0700 > On Tue, 27 Aug 2024 17:31:55 +0200 Alexander Lobakin wrote: >>>> But it's up to the vendors right, I can't force them to use this code or >>>> just switch their driver to use it :D >>> >>> It shouldn't be up to interpretation whether the library makes code >>> better. If it doesn't -- what's the point of the library. If it does >>> -- the other vendors better have a solid reason to push back. >> >> Potential reasons to push back (by "we" I mean some vendor X): >> >> * we want our names for Ethtool stats, not yours ("txq_X" instead of >> "sqX" and so on), we have scripts which already parse our names blah > > If the stat is standardized in a library why is it dumped via ethtool -S So do you mean that I need to make these stats generic Netlink instead of adding Ethtool stats? [...] >> FYI I already nack inside Intel any new drivers since I was promised >> that each next generation will be based on top of idpf. > > I don't mind new drivers, how they get written is a bigger problem, > but that's a different discussion. > >>>> I'll be happy to remove that for basic Rx/Tx queues (and leave only >>>> those which don't exist yet in the NL stats) and when you introduce >>>> more fields to NL stats, removing more from ethtool -S in this >>>> library will be easy. >>> >>> I don't scale to remembering 1 easy thing for each driver we have. >> >> Introducing a new field is adding 1 line with its name to the macro >> since everything else gets expanded from these macros anyway. > > Are you saying that people on your team struggle to add a statistic? For sure no. I just let the preprocessor do mechanical things instead of manual retyping stuff. > #1 correctness, #2 readability. LoC only matters indirectly under #2. #1 -- manual retyping is more error-prone? #2 -- gcc -E ? Anyway, just side notes, I get what you're saying and partially agree. > >>>> But let's say what should we do with XDP Tx >>>> queues? They're invisible to rtnl as they are past real_num_tx_queues. >>> >>> They go to ethtool -S today. It should be relatively easy to start >>> reporting them. I didn't add them because I don't have a clear use >>> case at the moment. >> >> The same as for regular Tx: debugging, imbalance etc. > > I'm not trying to imply they are useless. I just that I, subjectively, > don't have a clear use case in Meta's production env. > >>> save space on all the stats I'm not implementing. >> >> The stats I introduced here are supported by most, if not every, modern >> NIC drivers. Not supporting header split or HW GRO will save you 16 >> bytes on the queue struct which I don't think is a game changer. > > You don't understand. I built some infra over the last 3 years. > You didn't bother reading it. Now you pop our own thing to the side, > extending ethtool -S which is _unusable_ in a multi-vendor, production > environment. I read everything at the time you introduced it. I remember Ethernet standard stats, rmon, per-queue generic stats etc. I respect it and I like it. So just let me repeat my question so that all misunderstandings are gone: did I get it correctly that instead of adding Ethtool stats, I need to add new fields to the per-queue Netlink stats? I clearly don't have any issues with that and I'll be happy to drop Ethtool stats from the lib at all. (except XDP stats, they still go to ethtool -S for now? Or should I try making them generic as well?) > >>>> * implementing NL stats in drivers, not here; not exporting NL stats >>>> to ethtool -S >>>> >>>> A driver wants to export a field which is missing in the lib? It's a >>>> couple lines to add it. Another driver doesn't support this field and >>>> you want it to still be 0xff there? Already noted and I'm already >>>> implementing a different model. >>> >>> I think it will be very useful if you could step back and put on paper >>> what your goals are with this work, exactly. >> >> My goals: >> >> * reduce boilerplate code in drivers: declaring stats structures, >> Ethtool stats names, all these collecting, aggregating etc etc, you see >> in the last commit of the series how many LoCs get deleted from idpf, >> +/- the same amount would be removed from any other driver > > 21 files changed, 1634 insertions(+), 1002 deletions(-) Did you notice my "in the last commit"? 8 changed files with 232 additions and 691 deletions. > >> * reduce the time people debug and fix bugs in stats since it will be >> just in one place, not in each driver > > Examples ? Eeeh I remember there were commits as between the drivers, the logic to count some stats was inconsistent? But I don't have any links, so this is not an argument. But I also fixed bugs a couple time which were the same in several Intel drivers, that can always happen. > >> * have more consistent names in ethtool -S > > Forget it, better infra exists. Your hack to make stat count > "consistent" doesn't work either. Unless you assume only one process > can call ethtool -S at a time. > >> * have more consistent stats sets in drivers since even within Intel >> drivers it's a bit of a mess which stats are exported etc. > > Consistently undocumented :( qstats exist and are documented. > >> Most of your pushback here sounds like if I would try to introduce this >> in the core code, but I don't do that here. This infra saves a lot of >> locs > > s/saves/may save/ ? Series for more drivers are in progress, they really remove way more than add. > >> and time when used in the Intel drivers and it would be totally >> fine for me if some pieces of the lib goes into the core, but these >> stats don't. >> I didn't declare anywhere that everyone must use it or that it's core >> code, do you want me to change this MODULE_DESCRIPTION()? > > I deeply dislike the macro templating. Complex local systems of this > sort make it really painful to do cross-driver changes. It's fine for > you because you wrote this, but speaking from experience (mlx5) it makes > modifying the driver for an outside much harder. > > If you think the core infra is lacking, please improve it instead of > inventing your own, buggy one. Don't get me wrong, I did it via Ethtool not because I don't like your infra or so, strongly the opposite. Maybe it was just something like "I did it thousand times so I automatically did this that way for the 1001th time", or just because I wanted to deduplicate the Intel code and there, all the stats are in the Ethtool only, dunno =\ Thanks, Olek
On Wed, 28 Aug 2024 17:06:17 +0200 Alexander Lobakin wrote: > >> The stats I introduced here are supported by most, if not every, modern > >> NIC drivers. Not supporting header split or HW GRO will save you 16 > >> bytes on the queue struct which I don't think is a game changer. > > > > You don't understand. I built some infra over the last 3 years. > > You didn't bother reading it. Now you pop our own thing to the side, > > extending ethtool -S which is _unusable_ in a multi-vendor, production > > environment. > > I read everything at the time you introduced it. I remember Ethernet > standard stats, rmon, per-queue generic stats etc. I respect it and I > like it. > So just let me repeat my question so that all misunderstandings are > gone: did I get it correctly that instead of adding Ethtool stats, I > need to add new fields to the per-queue Netlink stats? I clearly don't > have any issues with that and I'll be happy to drop Ethtool stats from > the lib at all. That's half of it, the other half is excess of macro magic. > (except XDP stats, they still go to ethtool -S for now? Or should I try > making them generic as well?) Either way is fine by me. You may want to float the XDP stats first as a smaller series, just extending the spec and exposing from some driver already implementing qstat. In case someone else does object. > >> * reduce boilerplate code in drivers: declaring stats structures, > >> Ethtool stats names, all these collecting, aggregating etc etc, you see > >> in the last commit of the series how many LoCs get deleted from idpf, > >> +/- the same amount would be removed from any other driver > > > > 21 files changed, 1634 insertions(+), 1002 deletions(-) > > Did you notice my "in the last commit"? I may not have. But as you said, most drivers end up with some level of boilerplate around stats. So unless the stuff is used by more than one driver, and the savings are realized, the argument about saving LoC has to be heavily discounted. Queue xkcd 927. I should reiterate that I don't think LoC saving is a strong argument in the first place.
From: Jakub Kicinski <kuba@kernel.org> Date: Wed, 28 Aug 2024 13:22:35 -0700 > On Wed, 28 Aug 2024 17:06:17 +0200 Alexander Lobakin wrote: >>>> The stats I introduced here are supported by most, if not every, modern >>>> NIC drivers. Not supporting header split or HW GRO will save you 16 >>>> bytes on the queue struct which I don't think is a game changer. >>> >>> You don't understand. I built some infra over the last 3 years. >>> You didn't bother reading it. Now you pop our own thing to the side, >>> extending ethtool -S which is _unusable_ in a multi-vendor, production >>> environment. >> >> I read everything at the time you introduced it. I remember Ethernet >> standard stats, rmon, per-queue generic stats etc. I respect it and I >> like it. >> So just let me repeat my question so that all misunderstandings are >> gone: did I get it correctly that instead of adding Ethtool stats, I >> need to add new fields to the per-queue Netlink stats? I clearly don't >> have any issues with that and I'll be happy to drop Ethtool stats from >> the lib at all. > > That's half of it, the other half is excess of macro magic. > >> (except XDP stats, they still go to ethtool -S for now? Or should I try >> making them generic as well?) > > Either way is fine by me. You may want to float the XDP stats first as > a smaller series, just extending the spec and exposing from some driver > already implementing qstat. In case someone else does object. I think I'll do that the following way. To not delay this series and XDP for idpf in general, I'll drop these stats for now, leaving only onstack containers (they will be used in libeth_xdp etc.), without the macro magic. But at the same time I'll work on extending the NL queue stats, incl. XDP ones, and send them separately when they're done. Would that be fine? > >>>> * reduce boilerplate code in drivers: declaring stats structures, >>>> Ethtool stats names, all these collecting, aggregating etc etc, you see >>>> in the last commit of the series how many LoCs get deleted from idpf, >>>> +/- the same amount would be removed from any other driver >>> >>> 21 files changed, 1634 insertions(+), 1002 deletions(-) >> >> Did you notice my "in the last commit"? > > I may not have. But as you said, most drivers end up with some level of > boilerplate around stats. So unless the stuff is used by more than one > driver, and the savings are realized, the argument about saving LoC has > to be heavily discounted. Queue xkcd 927. I should reiterate that > I don't think LoC saving is a strong argument in the first place. Yes, I got that. In general, you're right this hurts readability a lot. Thanks, Olek
On Thu, 29 Aug 2024 14:01:06 +0200 Alexander Lobakin wrote: > > Either way is fine by me. You may want to float the XDP stats first as > > a smaller series, just extending the spec and exposing from some driver > > already implementing qstat. In case someone else does object. > > I think I'll do that the following way. To not delay this series and XDP > for idpf in general, I'll drop these stats for now, leaving only onstack > containers (they will be used in libeth_xdp etc.), without the macro > magic. But at the same time I'll work on extending the NL queue stats, > incl. XDP ones, and send them separately when they're done. Would that > be fine? Modulo implementation details -- SGTM!
diff --git a/drivers/net/ethernet/intel/libeth/Makefile b/drivers/net/ethernet/intel/libeth/Makefile index 52492b081132..b30a2804554f 100644 --- a/drivers/net/ethernet/intel/libeth/Makefile +++ b/drivers/net/ethernet/intel/libeth/Makefile @@ -3,4 +3,6 @@ obj-$(CONFIG_LIBETH) += libeth.o -libeth-y := rx.o +libeth-y += netdev.o +libeth-y += rx.o +libeth-y += stats.o diff --git a/drivers/net/ethernet/intel/libeth/netdev.c b/drivers/net/ethernet/intel/libeth/netdev.c new file mode 100644 index 000000000000..6115472b3bb6 --- /dev/null +++ b/drivers/net/ethernet/intel/libeth/netdev.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation */ + +#include <linux/etherdevice.h> +#include <linux/ethtool.h> + +#include <net/libeth/netdev.h> +#include <net/libeth/types.h> + +#include "priv.h" + +/** + * __libeth_netdev_alloc - allocate a &net_device with libeth generic stats + * @priv: sizeof() of the private structure with embedded &libeth_netdev_priv + * @rqs: maximum number of Rx queues to be used + * @sqs: maximum number of Tx queues to be used + * @xdpsqs: maximum number of XDP Tx queues to be used + * + * Allocates a new &net_device and initializes the embedded &libeth_netdev_priv + * and the libeth generic stats for it. + * Use the non-underscored wrapper in drivers instead. + * + * Return: new &net_device on success, %NULL on error. + */ +struct net_device *__libeth_netdev_alloc(u32 priv, u32 rqs, u32 sqs, + u32 xdpsqs) +{ + struct net_device *dev; + + dev = alloc_etherdev_mqs(priv, sqs, rqs); + if (!dev) + return NULL; + + if (!libeth_stats_init_priv(dev, rqs, sqs, xdpsqs)) + goto err_netdev; + + return dev; + +err_netdev: + free_netdev(dev); + + return NULL; +} +EXPORT_SYMBOL_NS_GPL(__libeth_netdev_alloc, LIBETH); + +/** + * libeth_netdev_free - free a &net_device with libeth generic stats + * @dev: &net_device to free + * + * Deinitializes and frees the embedded &libeth_netdev_priv and the netdev + * itself, to be used if @dev was allocated using libeth_netdev_alloc(). + */ +void libeth_netdev_free(struct net_device *dev) +{ + libeth_stats_free_priv(dev); + free_netdev(dev); +} +EXPORT_SYMBOL_NS_GPL(libeth_netdev_free, LIBETH); + +/** + * __libeth_set_real_num_queues - set the actual number of queues in use + * @dev: &net_device to configure + * @rqs: actual number of Rx queues + * @sqs: actual number of Tx queues + * @xdpsqs: actual number of XDP Tx queues + * + * Sets the actual number of queues in use, to be called on ifup for netdevs + * allocated via libeth_netdev_alloc(). + * Use the non-underscored wrapper in drivers instead. + * + * Return: %0 on success, -errno on error. + */ +int __libeth_set_real_num_queues(struct net_device *dev, u32 rqs, u32 sqs, + u32 xdpsqs) +{ + struct libeth_netdev_priv *priv = netdev_priv(dev); + int ret; + + ret = netif_set_real_num_rx_queues(dev, rqs); + if (ret) + return ret; + + ret = netif_set_real_num_tx_queues(dev, sqs); + if (ret) + return ret; + + priv->curr_xdpsqs = xdpsqs; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(__libeth_set_real_num_queues, LIBETH); + +/* Ethtool */ + +/** + * libeth_ethtool_get_sset_count - get the number of libeth generic stats + * @dev: libeth-driven &net_device + * @sset: ``ETH_SS_STATS`` only, for compatibility with Ethtool callbacks + * + * Can be used directly in ðtool_ops if the driver doesn't have HW-specific + * stats or called from the corresponding driver callback. + * + * Return: the number of stats/stringsets. + */ +int libeth_ethtool_get_sset_count(struct net_device *dev, int sset) +{ + if (sset != ETH_SS_STATS) + return 0; + + return libeth_stats_get_sset_count(dev); +} +EXPORT_SYMBOL_NS_GPL(libeth_ethtool_get_sset_count, LIBETH); + +/** + * libeth_ethtool_get_strings - get libeth generic stats strings/names + * @dev: libeth-driven &net_device + * @sset: ``ETH_SS_STATS`` only, for compatibility with Ethtool callbacks + * @data: container to fill with the stats names + * + * Can be used directly in ðtool_ops if the driver doesn't have HW-specific + * stats or called from the corresponding driver callback. + * Note that the function doesn't advance the @data pointer, so it's better to + * call it at the end to avoid code complication. + */ +void libeth_ethtool_get_strings(struct net_device *dev, u32 sset, u8 *data) +{ + if (sset != ETH_SS_STATS) + return; + + libeth_stats_get_strings(dev, data); +} +EXPORT_SYMBOL_NS_GPL(libeth_ethtool_get_strings, LIBETH); + +/** + * libeth_ethtool_get_stats - get libeth generic stats counters + * @dev: libeth-driven &net_device + * @stats: unused, for compatibility with Ethtool callbacks + * @data: container to fill with the stats counters + * + * Can be used directly in ðtool_ops if the driver doesn't have HW-specific + * stats or called from the corresponding driver callback. + * Note that the function doesn't advance the @data pointer, so it's better to + * call it at the end to avoid code complication. Anyhow, the order must be the + * same as in the ::get_strings() implementation. + */ +void libeth_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + libeth_stats_get_data(dev, data); +} +EXPORT_SYMBOL_NS_GPL(libeth_ethtool_get_stats, LIBETH); + +/* Module */ + +MODULE_DESCRIPTION("Common Ethernet library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/intel/libeth/priv.h b/drivers/net/ethernet/intel/libeth/priv.h new file mode 100644 index 000000000000..6455aab0311c --- /dev/null +++ b/drivers/net/ethernet/intel/libeth/priv.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_PRIV_H +#define __LIBETH_PRIV_H + +#include <linux/types.h> + +/* Stats */ + +struct net_device; + +bool libeth_stats_init_priv(struct net_device *dev, u32 rqs, u32 sqs, + u32 xdpsqs); +void libeth_stats_free_priv(const struct net_device *dev); + +int libeth_stats_get_sset_count(struct net_device *dev); +void libeth_stats_get_strings(struct net_device *dev, u8 *data); +void libeth_stats_get_data(struct net_device *dev, u64 *data); + +#endif /* __LIBETH_PRIV_H */ diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c index f20926669318..d31779bbfccd 100644 --- a/drivers/net/ethernet/intel/libeth/rx.c +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -252,8 +252,3 @@ void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt) pt->hash_type |= libeth_rx_pt_xdp_pl[pt->payload_layer]; } EXPORT_SYMBOL_NS_GPL(libeth_rx_pt_gen_hash_type, LIBETH); - -/* Module */ - -MODULE_DESCRIPTION("Common Ethernet library"); -MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/intel/libeth/stats.c b/drivers/net/ethernet/intel/libeth/stats.c new file mode 100644 index 000000000000..61618aa71cbf --- /dev/null +++ b/drivers/net/ethernet/intel/libeth/stats.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation */ + +#include <linux/ethtool.h> + +#include <net/libeth/stats.h> +#include <net/netdev_queues.h> + +#include "priv.h" + +/* Common */ + +static void libeth_stats_sync(u64 *base, u64 *sarr, + const struct u64_stats_sync *syncp, + const u64_stats_t *raw, u32 num) +{ + u32 start; + + do { + start = u64_stats_fetch_begin(syncp); + for (u32 i = 0; i < num; i++) + sarr[i] = u64_stats_read(&raw[i]); + } while (u64_stats_fetch_retry(syncp, start)); + + for (u32 i = 0; i < num; i++) + base[i] += sarr[i]; +} + +static void __libeth_stats_get_strings(u8 **data, u32 qid, const char *pfx, + const char * const *str, u32 num) +{ + for (u32 i = 0; i < num; i++) + ethtool_sprintf(data, "%s%u_%s", pfx, qid, str[i]); +} + +/* The following barely readable compression block defines, amidst others, + * exported libeth_{rq,sq,xdpsq}_stats_{,de}init() which must be called for + * each stats container embedded in a queue structure on ifup/ifdown + * correspondingly. Note that the @qid it takes is the networking stack + * queue ID, not a driver/device internal one. + */ + +#define ___base(s) aligned_u64 s; +#define ___string(s) __stringify(s), + +#define LIBETH_STATS_DEFINE_HELPERS(pfx, PFX) \ +struct libeth_##pfx##_base_stats { \ + struct mutex lock; \ + \ + union { \ + struct { \ + LIBETH_DECLARE_##PFX##_STATS(___base); \ + }; \ + DECLARE_FLEX_ARRAY(aligned_u64, raw); \ + }; \ +}; \ +static const char * const libeth_##pfx##_stats_str[] = { \ + LIBETH_DECLARE_##PFX##_STATS(___string) \ +}; \ +static const u32 LIBETH_##PFX##_STATS_NUM = \ + ARRAY_SIZE(libeth_##pfx##_stats_str); \ + \ +static void libeth_##pfx##_stats_sync(u64 *base, \ + const struct libeth_##pfx##_stats *qs) \ +{ \ + u64 sarr[ARRAY_SIZE(libeth_##pfx##_stats_str)]; \ + \ + if (qs) \ + libeth_stats_sync(base, sarr, &qs->syncp, qs->raw, \ + LIBETH_##PFX##_STATS_NUM); \ +} \ + \ +void libeth_##pfx##_stats_init(const struct net_device *dev, \ + struct libeth_##pfx##_stats *stats, \ + u32 qid) \ +{ \ + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ + \ + memset(stats, 0, sizeof(*stats)); \ + u64_stats_init(&stats->syncp); \ + \ + mutex_init(&priv->base_##pfx##s[qid].lock); \ + WRITE_ONCE(priv->live_##pfx##s[qid], stats); \ +} \ +EXPORT_SYMBOL_NS_GPL(libeth_##pfx##_stats_init, LIBETH); \ + \ +void libeth_##pfx##_stats_deinit(const struct net_device *dev, u32 qid) \ +{ \ + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ + struct libeth_##pfx##_base_stats *base = &priv->base_##pfx##s[qid]; \ + \ + mutex_lock(&base->lock); \ + libeth_##pfx##_stats_sync(base->raw, \ + READ_ONCE(priv->live_##pfx##s[qid])); \ + mutex_unlock(&base->lock); \ + \ + WRITE_ONCE(priv->live_##pfx##s[qid], NULL); \ +} \ +EXPORT_SYMBOL_NS_GPL(libeth_##pfx##_stats_deinit, LIBETH); \ + \ +static void libeth_##pfx##_stats_get_strings(u8 **data, u32 num) \ +{ \ + for (u32 i = 0; i < num; i++) \ + __libeth_stats_get_strings(data, i, #pfx, \ + libeth_##pfx##_stats_str, \ + LIBETH_##PFX##_STATS_NUM); \ +} \ + \ +static void \ +__libeth_##pfx##_stats_get_data(u64 **data, \ + struct libeth_##pfx##_base_stats *base, \ + const struct libeth_##pfx##_stats *qs) \ +{ \ + mutex_lock(&base->lock); \ + memcpy(*data, base->raw, sizeof(*base)); \ + mutex_unlock(&base->lock); \ + \ + libeth_##pfx##_stats_sync(*data, qs); \ + *data += LIBETH_##PFX##_STATS_NUM; \ +} \ + \ +static void \ +libeth_##pfx##_stats_get_data(u64 **data, \ + const struct libeth_netdev_priv *priv) \ +{ \ + for (u32 i = 0; i < priv->last_##pfx##s; i++) { \ + const struct libeth_##pfx##_stats *qs; \ + \ + qs = READ_ONCE(priv->live_##pfx##s[i]); \ + __libeth_##pfx##_stats_get_data(data, \ + &priv->base_##pfx##s[i], \ + qs); \ + } \ +} + +LIBETH_STATS_DEFINE_HELPERS(rq, RQ); +LIBETH_STATS_DEFINE_HELPERS(sq, SQ); +LIBETH_STATS_DEFINE_HELPERS(xdpsq, XDPSQ); + +#undef ___base +#undef ___string + +/* Netlink stats. Exported fields have the same names as in the NL structs */ + +struct libeth_stats_export { + u16 li; + u16 gi; +}; + +#define LIBETH_STATS_EXPORT(lpfx, gpfx, field) { \ + .li = (offsetof(struct libeth_##lpfx##_stats, field) - \ + offsetof(struct libeth_##lpfx##_stats, raw)) / \ + sizeof_field(struct libeth_##lpfx##_stats, field), \ + .gi = offsetof(struct netdev_queue_stats_##gpfx, field) / \ + sizeof_field(struct netdev_queue_stats_##gpfx, field) \ +} +#define LIBETH_RQ_STATS_EXPORT(field) LIBETH_STATS_EXPORT(rq, rx, field) +#define LIBETH_SQ_STATS_EXPORT(field) LIBETH_STATS_EXPORT(sq, tx, field) + +static const struct libeth_stats_export libeth_rq_stats_export[] = { + LIBETH_RQ_STATS_EXPORT(bytes), + LIBETH_RQ_STATS_EXPORT(packets), + LIBETH_RQ_STATS_EXPORT(csum_unnecessary), + LIBETH_RQ_STATS_EXPORT(hw_gro_packets), + LIBETH_RQ_STATS_EXPORT(hw_gro_bytes), + LIBETH_RQ_STATS_EXPORT(alloc_fail), + LIBETH_RQ_STATS_EXPORT(csum_none), + LIBETH_RQ_STATS_EXPORT(csum_bad), +}; + +static const struct libeth_stats_export libeth_sq_stats_export[] = { + LIBETH_SQ_STATS_EXPORT(bytes), + LIBETH_SQ_STATS_EXPORT(packets), + LIBETH_SQ_STATS_EXPORT(csum_none), + LIBETH_SQ_STATS_EXPORT(needs_csum), + LIBETH_SQ_STATS_EXPORT(hw_gso_packets), + LIBETH_SQ_STATS_EXPORT(hw_gso_bytes), + LIBETH_SQ_STATS_EXPORT(stop), + LIBETH_SQ_STATS_EXPORT(wake), +}; + +#define libeth_stats_foreach_export(pfx, iter) \ + for (const struct libeth_stats_export *iter = \ + &libeth_##pfx##_stats_export[0]; \ + iter < &libeth_##pfx##_stats_export[ \ + ARRAY_SIZE(libeth_##pfx##_stats_export)]; \ + iter++) + +#define LIBETH_STATS_DEFINE_EXPORT(pfx, gpfx) \ +static void \ +libeth_get_queue_stats_##gpfx(struct net_device *dev, int idx, \ + struct netdev_queue_stats_##gpfx *stats) \ +{ \ + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ + const struct libeth_##pfx##_stats *qs; \ + u64 *raw = (u64 *)stats; \ + u32 start; \ + \ + qs = READ_ONCE(priv->live_##pfx##s[idx]); \ + if (!qs) \ + return; \ + \ + do { \ + start = u64_stats_fetch_begin(&qs->syncp); \ + \ + libeth_stats_foreach_export(pfx, exp) \ + raw[exp->gi] = u64_stats_read(&qs->raw[exp->li]); \ + } while (u64_stats_fetch_retry(&qs->syncp, start)); \ +} \ + \ +static void \ +libeth_get_##pfx##_base_stats(const struct net_device *dev, \ + struct netdev_queue_stats_##gpfx *stats) \ +{ \ + const struct libeth_netdev_priv *priv = netdev_priv(dev); \ + u64 *raw = (u64 *)stats; \ + \ + memset(stats, 0, sizeof(*(stats))); \ + \ + for (u32 i = 0; i < dev->num_##gpfx##_queues; i++) { \ + struct libeth_##pfx##_base_stats *base = \ + &priv->base_##pfx##s[i]; \ + \ + mutex_lock(&base->lock); \ + \ + libeth_stats_foreach_export(pfx, exp) \ + raw[exp->gi] += base->raw[exp->li]; \ + \ + mutex_unlock(&base->lock); \ + } \ +} + +LIBETH_STATS_DEFINE_EXPORT(rq, rx); +LIBETH_STATS_DEFINE_EXPORT(sq, tx); + +static void libeth_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + libeth_get_rq_base_stats(dev, rx); + libeth_get_sq_base_stats(dev, tx); +} + +static const struct netdev_stat_ops libeth_netdev_stat_ops = { + .get_base_stats = libeth_get_base_stats, + .get_queue_stats_rx = libeth_get_queue_stats_rx, + .get_queue_stats_tx = libeth_get_queue_stats_tx, +}; + +/* Ethtool: base + live */ + +int libeth_stats_get_sset_count(struct net_device *dev) +{ + struct libeth_netdev_priv *priv = netdev_priv(dev); + + priv->last_rqs = dev->real_num_rx_queues; + priv->last_sqs = dev->real_num_tx_queues; + priv->last_xdpsqs = priv->curr_xdpsqs; + + return priv->last_rqs * LIBETH_RQ_STATS_NUM + + priv->last_sqs * LIBETH_SQ_STATS_NUM + + priv->last_xdpsqs * LIBETH_XDPSQ_STATS_NUM; +} + +void libeth_stats_get_strings(struct net_device *dev, u8 *data) +{ + const struct libeth_netdev_priv *priv = netdev_priv(dev); + + libeth_rq_stats_get_strings(&data, priv->last_rqs); + libeth_sq_stats_get_strings(&data, priv->last_sqs); + libeth_xdpsq_stats_get_strings(&data, priv->last_xdpsqs); +} + +void libeth_stats_get_data(struct net_device *dev, u64 *data) +{ + struct libeth_netdev_priv *priv = netdev_priv(dev); + + libeth_rq_stats_get_data(&data, priv); + libeth_sq_stats_get_data(&data, priv); + libeth_xdpsq_stats_get_data(&data, priv); + + priv->last_rqs = 0; + priv->last_sqs = 0; + priv->last_xdpsqs = 0; +} + +/* Private init. + * All the structures getting allocated here are slowpath, so NUMA-aware + * allocation is not required for them. + */ + +bool libeth_stats_init_priv(struct net_device *dev, u32 rqs, u32 sqs, + u32 xdpsqs) +{ + struct libeth_netdev_priv *priv = netdev_priv(dev); + + priv->base_rqs = kvcalloc(rqs, sizeof(*priv->base_rqs), GFP_KERNEL); + if (!priv->base_rqs) + return false; + + priv->live_rqs = kvcalloc(rqs, sizeof(*priv->live_rqs), GFP_KERNEL); + if (!priv->live_rqs) + goto err_base_rqs; + + priv->base_sqs = kvcalloc(sqs, sizeof(*priv->base_sqs), GFP_KERNEL); + if (!priv->base_sqs) + goto err_live_rqs; + + priv->live_sqs = kvcalloc(sqs, sizeof(*priv->live_sqs), GFP_KERNEL); + if (!priv->live_sqs) + goto err_base_sqs; + + dev->stat_ops = &libeth_netdev_stat_ops; + + if (!xdpsqs) + return true; + + priv->base_xdpsqs = kvcalloc(xdpsqs, sizeof(*priv->base_xdpsqs), + GFP_KERNEL); + if (!priv->base_xdpsqs) + goto err_live_sqs; + + priv->live_xdpsqs = kvcalloc(xdpsqs, sizeof(*priv->live_xdpsqs), + GFP_KERNEL); + if (!priv->live_xdpsqs) + goto err_base_xdpsqs; + + priv->max_xdpsqs = xdpsqs; + + return true; + +err_base_xdpsqs: + kvfree(priv->base_xdpsqs); +err_live_sqs: + kvfree(priv->live_sqs); +err_base_sqs: + kvfree(priv->base_sqs); +err_live_rqs: + kvfree(priv->live_rqs); +err_base_rqs: + kvfree(priv->base_rqs); + + return false; +} + +void libeth_stats_free_priv(const struct net_device *dev) +{ + const struct libeth_netdev_priv *priv = netdev_priv(dev); + + kvfree(priv->base_rqs); + kvfree(priv->live_rqs); + kvfree(priv->base_sqs); + kvfree(priv->live_sqs); + + if (!priv->max_xdpsqs) + return; + + kvfree(priv->base_xdpsqs); + kvfree(priv->live_xdpsqs); +} diff --git a/include/net/libeth/netdev.h b/include/net/libeth/netdev.h new file mode 100644 index 000000000000..22a07f0b16d7 --- /dev/null +++ b/include/net/libeth/netdev.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_NETDEV_H +#define __LIBETH_NETDEV_H + +#include <linux/types.h> + +struct ethtool_stats; + +struct net_device *__libeth_netdev_alloc(u32 priv, u32 rqs, u32 sqs, + u32 xdpsqs); +void libeth_netdev_free(struct net_device *dev); + +int __libeth_set_real_num_queues(struct net_device *dev, u32 rqs, u32 sqs, + u32 xdpsqs); + +#define libeth_netdev_alloc(priv, rqs, sqs, ...) \ + __libeth_netdev_alloc(priv, rqs, sqs, (__VA_ARGS__ + 0)) +#define libeth_set_real_num_queues(dev, rqs, sqs, ...) \ + __libeth_set_real_num_queues(dev, rqs, sqs, (__VA_ARGS__ + 0)) + +/* Ethtool */ + +int libeth_ethtool_get_sset_count(struct net_device *dev, int sset); +void libeth_ethtool_get_strings(struct net_device *dev, u32 sset, u8 *data); +void libeth_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data); + +#endif /* __LIBETH_NETDEV_H */ diff --git a/include/net/libeth/stats.h b/include/net/libeth/stats.h new file mode 100644 index 000000000000..fe7fed543d33 --- /dev/null +++ b/include/net/libeth/stats.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_STATS_H +#define __LIBETH_STATS_H + +#include <linux/skbuff.h> +#include <linux/unroll.h> + +#include <net/libeth/types.h> + +/* Common */ + +/** + * libeth_stats_inc_one - safely increment one stats structure counter + * @s: queue stats structure to update (&libeth_rq_stats etc.) + * @f: name of the field to increment + * + * To be used on exception or slow paths -- allocation fails, queue stops etc. + */ +#define libeth_stats_inc_one(s, f) \ + __libeth_stats_inc_one(s, f, __UNIQUE_ID(qs_)) +#define __libeth_stats_inc_one(s, f, n) do { \ + typeof(*(s)) *n = (s); \ + \ + u64_stats_update_begin(&n->syncp); \ + u64_stats_inc(&n->f); \ + u64_stats_update_end(&n->syncp); \ +} while (0) + +/** + * libeth_stats_add_frags - update the frags counter if needed + * @s: onstack stats structure to update (&libeth_rq_napi_stats etc.) + * @frags: number of frags processed + * + * Update the frags counter if @frags > 1, do nothing for non-SG frames. + */ +#define libeth_stats_add_frags(s, frags) \ + __libeth_stats_add_frags(s, frags, __UNIQUE_ID(frags_)) +#define __libeth_stats_add_frags(s, frags, uf) do { \ + u32 uf = (frags); \ + \ + if (uf > 1) \ + (s)->fragments += uf; \ +} while (0) + +#define ___libeth_stats_add(qs, ss, group, uq, us, ur) do { \ + typeof(*(qs)) *uq = (qs); \ + u64_stats_t *ur = (typeof(ur))&uq->group; \ + typeof(*(ss)) *us = (ss); \ + \ + static_assert(sizeof(uq->group) == sizeof(*us) * 2); \ + u64_stats_update_begin(&uq->syncp); \ + \ + unrolled_count(__alignof(*uq) / sizeof(*uq->raw)) \ + for (u32 i = 0; i < sizeof(*us) / sizeof(*us->raw); i++) \ + u64_stats_add(&ur[i], us->raw[i]); \ + \ + u64_stats_update_end(&uq->syncp); \ +} while (0) +#define __libeth_stats_add(qs, ss, group) \ + ___libeth_stats_add(qs, ss, group, __UNIQUE_ID(qs_), \ + __UNIQUE_ID(ss_), __UNIQUE_ID(raw_)) + +/* The following barely readable compression block defines the following + * entities to be used in drivers: + * + * &libeth_rq_napi_stats - onstack stats container for RQ NAPI polling + * libeth_rq_napi_stats_add() - add RQ onstack stats to the queue container + * &libeth_sq_napi_stats - onstack stats container for SQ completion polling + * libeth_sq_napi_stats_add() - add SQ onstack stats to the queue container + * &libeth_sq_xmit_stats - onstack stats container for ::ndo_start_xmit() + * libeth_sq_xmit_stats_add() - add SQ xmit stats to the queue container + * &libeth_xdpsq_napi_stats - onstack stats container for XDPSQ polling + * libeth_xdpsq_napi_stats_add() - add XDPSQ stats to the queue container + * + * During the NAPI poll loop or any other hot function, the "hot" counters + * get updated on the stack only. Then at the end, the corresponding _add() + * is called to quickly add them to the stats container embedded into the + * queue structure using __libeth_stats_add(). + * The onstack counters are of type u32, thus it is assumed that one + * polling/sending cycle can't go above ``U32_MAX`` for any of them. + */ + +#define ___stack(s) u32 s; + +#define LIBETH_STATS_DEFINE_STACK(pfx, PFX, type, TYPE) \ +struct libeth_##pfx##_##type##_stats { \ + union { \ + struct { \ + LIBETH_DECLARE_##PFX##_##TYPE##_STATS(___stack); \ + }; \ + DECLARE_FLEX_ARRAY(u32, raw); \ + }; \ +}; \ + \ +static inline void \ +libeth_##pfx##_##type##_stats_add(struct libeth_##pfx##_stats *qs, \ + const struct libeth_##pfx##_##type##_stats \ + *ss) \ +{ \ + __libeth_stats_add(qs, ss, type); \ +} + +#define LIBETH_STATS_DECLARE_HELPERS(pfx) \ +void libeth_##pfx##_stats_init(const struct net_device *dev, \ + struct libeth_##pfx##_stats *stats, \ + u32 qid); \ +void libeth_##pfx##_stats_deinit(const struct net_device *dev, u32 qid) + +LIBETH_STATS_DEFINE_STACK(rq, RQ, napi, NAPI); +LIBETH_STATS_DECLARE_HELPERS(rq); + +LIBETH_STATS_DEFINE_STACK(sq, SQ, napi, NAPI); +LIBETH_STATS_DEFINE_STACK(sq, SQ, xmit, XMIT); + +/** + * libeth_sq_xmit_stats_csum - convert skb csum status to the SQ xmit stats + * @ss: onstack SQ xmit stats to increment + * @skb: &sk_buff to process stats for + * + * To be called from ::ndo_start_xmit() to account whether checksum offload + * was enabled when sending this @skb. + */ +static inline void libeth_sq_xmit_stats_csum(struct libeth_sq_xmit_stats *ss, + const struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_PARTIAL) + ss->needs_csum++; + else + ss->csum_none++; +} + +LIBETH_STATS_DECLARE_HELPERS(sq); + +LIBETH_STATS_DEFINE_STACK(xdpsq, XDPSQ, napi, NAPI); +LIBETH_STATS_DECLARE_HELPERS(xdpsq); + +#undef ___stack + +#endif /* __LIBETH_STATS_H */ diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h new file mode 100644 index 000000000000..4ebb81621d1e --- /dev/null +++ b/include/net/libeth/types.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_TYPES_H +#define __LIBETH_TYPES_H + +#include <linux/u64_stats_sync.h> + +/** + * struct libeth_netdev_priv - libeth netdev private structure + * @curr_xdpsqs: current number of XDPSQs in use + * @max_xdpsqs: maximum number of XDPSQs this netdev has + * @last_rqs: number of RQs last time Ethtool stats were requested + * @last_sqs: number of SQs last time Ethtool stats were requested + * @last_xdpsqs: number of XDPSQ last time Ethtool stats were requested + * @base_rqs: per-queue RQ stats containers with the netdev lifetime + * @base_sqs: per-queue SQ stats containers with the netdev lifetime + * @base_xdpsqs: per-queue XDPSQ stats containers with the netdev lifetime + * @live_rqs: pointers to the current driver's embedded RQ stats + * @live_sqs: pointers to the current driver's embedded SQ stats + * @live_xdpsqs: pointers to the current driver's embedded XDPSQ stats + * + * The structure must be placed strictly at the beginning of driver's netdev + * private structure if it uses libeth generic stats, as libeth uses + * netdev_priv() to access it. The structure is private to libeth and + * shouldn't be accessed from drivers directly. + */ +struct libeth_netdev_priv { + u32 curr_xdpsqs; + u32 max_xdpsqs; + + u16 last_rqs; + u16 last_sqs; + u16 last_xdpsqs; + + struct libeth_rq_base_stats *base_rqs; + struct libeth_sq_base_stats *base_sqs; + struct libeth_xdpsq_base_stats *base_xdpsqs; + + const struct libeth_rq_stats **live_rqs; + const struct libeth_sq_stats **live_sqs; + const struct libeth_xdpsq_stats **live_xdpsqs; + + /* Driver private data, ____cacheline_aligned */ +} ____cacheline_aligned; + +/** + * libeth_netdev_priv_assert - assert the layout of driver's netdev priv struct + * @t: typeof() of driver's netdev private structure + * @f: name of the embedded &libeth_netdev_priv inside @t + * + * Make sure &libeth_netdev_priv is placed strictly at the beginning of + * driver's private structure, so that libeth can use netdev_priv() to + * access it. + * To be called right after driver's netdev private struct declaration. + */ +#define libeth_netdev_priv_assert(t, f) \ + static_assert(__same_type(struct libeth_netdev_priv, \ + typeof_member(t, f)) && !offsetof(t, f)) + +/* Stats. '[NL]' means it's exported to the Netlink per-queue stats */ + +/* Use 32-byte alignment to reduce false sharing. The first ~4 fields usually + * are the hottest and the stats update helpers are unrolled by this count. + */ +#define __libeth_stats_aligned \ + __aligned(__cmp(min, 4 * sizeof(u64_stats_t), SMP_CACHE_BYTES)) + +/* Align queue stats counters naturally in case they aren't */ +#define __libeth_u64_stats_t \ + u64_stats_t __aligned(sizeof(u64_stats_t)) + +#define ___live(s) __libeth_u64_stats_t s; + +/* Rx per-queue stats: + * + * napi: "hot" counters, updated in bulks from NAPI polling loops: + * bytes: bytes received on this queue [NL] + * packets: packets received on this queue [NL] + * fragments: number of processed descriptors carrying only a fragment + * csum_unnecessary: number of frames the device checked the checksum for [NL] + * hsplit: number of frames the device performed the header split for + * hsplit_linear: number of frames placed entirely to the header buffer + * hw_gro_packets: number of frames the device did HW GRO for [NL] + * hw_gro_bytes: bytes for all HW GROed frames [NL] + * + * fail: "slow"/error counters, incremented by one when occurred: + * alloc_fail: number of FQE (Rx buffer) allocation fails [NL] + * dma_errs: number of hardware Rx DMA errors + * csum_none: number of frames the device didn't check the checksum for [NL] + * csum_bad: number of frames with invalid checksum [NL] + * hsplit_errs: number of header split errors (header buffer overflows etc.) + * build_fail: number of napi_build_skb() fails + * + * &libeth_rq_stats must be embedded into the corresponding queue structure. + */ + +#define LIBETH_DECLARE_RQ_NAPI_STATS(act) \ + act(bytes) \ + act(packets) \ + act(fragments) \ + act(csum_unnecessary) \ + act(hsplit) \ + act(hsplit_linear) \ + act(hw_gro_packets) \ + act(hw_gro_bytes) + +#define LIBETH_DECLARE_RQ_FAIL_STATS(act) \ + act(alloc_fail) \ + act(dma_errs) \ + act(csum_none) \ + act(csum_bad) \ + act(hsplit_errs) \ + act(build_fail) + +#define LIBETH_DECLARE_RQ_STATS(act) \ + LIBETH_DECLARE_RQ_NAPI_STATS(act) \ + LIBETH_DECLARE_RQ_FAIL_STATS(act) + +struct libeth_rq_stats { + struct u64_stats_sync syncp; + + union { + struct { + struct_group(napi, + LIBETH_DECLARE_RQ_NAPI_STATS(___live); + ); + LIBETH_DECLARE_RQ_FAIL_STATS(___live); + }; + DECLARE_FLEX_ARRAY(__libeth_u64_stats_t, raw); + }; +} __libeth_stats_aligned; + +/* Tx per-queue stats: + * + * napi: "hot" counters, updated in bulks from NAPI polling loops: + * bytes: bytes sent from this queue [NL] + * packets: packets sent from this queue [NL] + * + * xmit: "hot" counters, updated in bulks from ::ndo_start_xmit(): + * fragments: number of descriptors carrying only a fragment + * csum_none: number of frames sent w/o checksum offload [NL] + * needs_csum: number of frames sent with checksum offload [NL] + * hw_gso_packets: number of frames sent with segmentation offload [NL] + * tso: number of frames sent with TCP segmentation offload + * uso: number of frames sent with UDP L4 segmentation offload + * hw_gso_bytes: total bytes for HW GSOed frames [NL] + * + * fail: "slow"/error counters, incremented by one when occurred: + * linearized: number of non-linear skbs linearized due to HW limits + * dma_map_errs: number of DMA mapping errors + * drops: number of skbs dropped by ::ndo_start_xmit() + * busy: number of xmit failures due to the queue being full + * stop: number of times the queue was stopped by the driver [NL] + * wake: number of times the queue was started after being stopped [NL] + * + * &libeth_sq_stats must be embedded into the corresponding queue structure. + */ + +#define LIBETH_DECLARE_SQ_NAPI_STATS(act) \ + act(bytes) \ + act(packets) + +#define LIBETH_DECLARE_SQ_XMIT_STATS(act) \ + act(fragments) \ + act(csum_none) \ + act(needs_csum) \ + act(hw_gso_packets) \ + act(tso) \ + act(uso) \ + act(hw_gso_bytes) + +#define LIBETH_DECLARE_SQ_FAIL_STATS(act) \ + act(linearized) \ + act(dma_map_errs) \ + act(drops) \ + act(busy) \ + act(stop) \ + act(wake) + +#define LIBETH_DECLARE_SQ_STATS(act) \ + LIBETH_DECLARE_SQ_NAPI_STATS(act) \ + LIBETH_DECLARE_SQ_XMIT_STATS(act) \ + LIBETH_DECLARE_SQ_FAIL_STATS(act) + +struct libeth_sq_stats { + struct u64_stats_sync syncp; + + union { + struct { + struct_group(napi, + LIBETH_DECLARE_SQ_NAPI_STATS(___live); + ); + struct_group(xmit, + LIBETH_DECLARE_SQ_XMIT_STATS(___live); + ); + LIBETH_DECLARE_SQ_FAIL_STATS(___live); + }; + DECLARE_FLEX_ARRAY(__libeth_u64_stats_t, raw); + }; +} __libeth_stats_aligned; + +/* XDP Tx per-queue stats: + * + * napi: "hot" counters, updated in bulks from NAPI polling loops: + * bytes: bytes sent from this queue + * packets: packets sent from this queue + * fragments: number of descriptors carrying only a fragment + * + * fail: "slow"/error counters, incremented by one when occurred: + * dma_map_errs: number of DMA mapping errors + * drops: number of frags dropped due to the queue being full + * busy: number of xmit failures due to the queue being full + * + * &libeth_xdpsq_stats must be embedded into the corresponding queue structure. + */ + +#define LIBETH_DECLARE_XDPSQ_NAPI_STATS(act) \ + LIBETH_DECLARE_SQ_NAPI_STATS(act) \ + act(fragments) + +#define LIBETH_DECLARE_XDPSQ_FAIL_STATS(act) \ + act(dma_map_errs) \ + act(drops) \ + act(busy) + +#define LIBETH_DECLARE_XDPSQ_STATS(act) \ + LIBETH_DECLARE_XDPSQ_NAPI_STATS(act) \ + LIBETH_DECLARE_XDPSQ_FAIL_STATS(act) + +struct libeth_xdpsq_stats { + struct u64_stats_sync syncp; + + union { + struct { + struct_group(napi, + LIBETH_DECLARE_XDPSQ_NAPI_STATS(___live); + ); + LIBETH_DECLARE_XDPSQ_FAIL_STATS(___live); + }; + DECLARE_FLEX_ARRAY(__libeth_u64_stats_t, raw); + }; +} __libeth_stats_aligned; + +#undef ___live + +#endif /* __LIBETH_TYPES_H */