diff mbox series

[net-next,v3,6/8] igc: Add support for tuning frame preemption via ethtool

Message ID 20210122224453.4161729-7-vinicius.gomes@intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series ethtool: Add support for frame preemption | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 2 maintainers not CCed: jesse.brandeburg@intel.com davem@davemloft.net
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 195 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Vinicius Costa Gomes Jan. 22, 2021, 10:44 p.m. UTC
The tc subsystem sets which queues are marked as preemptible, it's the
role of ethtool to control more hardware specific parameters. These
parameters include:

 - enabling the frame preemption hardware: As enabling frame
 preemption may have other requirements before it can be enabled, it's
 exposed via the ethtool API;

 - mininum fragment size multiplier: expressed in usually in the form
 of (1 + N)*64, this number indicates what's the size of the minimum
 fragment that can be preempted.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         | 12 +++++
 drivers/net/ethernet/intel/igc/igc_defines.h |  4 ++
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 53 ++++++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_tsn.c     | 25 +++++++--
 4 files changed, 91 insertions(+), 3 deletions(-)

Comments

Vladimir Oltean Jan. 26, 2021, 12:32 a.m. UTC | #1
On Fri, Jan 22, 2021 at 02:44:51PM -0800, Vinicius Costa Gomes wrote:
> The tc subsystem sets which queues are marked as preemptible, it's the
> role of ethtool to control more hardware specific parameters. These
> parameters include:
> 
>  - enabling the frame preemption hardware: As enabling frame
>  preemption may have other requirements before it can be enabled, it's
>  exposed via the ethtool API;
> 
>  - mininum fragment size multiplier: expressed in usually in the form
>  of (1 + N)*64, this number indicates what's the size of the minimum
>  fragment that can be preempted.

And not one word has been said about the patch...

> 
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> ---
>  drivers/net/ethernet/intel/igc/igc.h         | 12 +++++
>  drivers/net/ethernet/intel/igc/igc_defines.h |  4 ++
>  drivers/net/ethernet/intel/igc/igc_ethtool.c | 53 ++++++++++++++++++++
>  drivers/net/ethernet/intel/igc/igc_tsn.c     | 25 +++++++--
>  4 files changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> index 35baae900c1f..1067c46e0bc2 100644
> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -87,6 +87,7 @@ struct igc_ring {
>  	u8 queue_index;                 /* logical index of the ring*/
>  	u8 reg_idx;                     /* physical index of the ring */
>  	bool launchtime_enable;         /* true if LaunchTime is enabled */
> +	bool preemptible;		/* true if not express */

Mixing tabs and spaces?

> +static int igc_ethtool_set_preempt(struct net_device *netdev,
> +				   struct ethtool_fp *fpcmd,
> +				   struct netlink_ext_ack *extack)
> +{
> +	struct igc_adapter *adapter = netdev_priv(netdev);
> +	int i;
> +
> +	if (fpcmd->add_frag_size < 68 || fpcmd->add_frag_size > 260) {
> +		NL_SET_ERR_MSG_MOD(extack, "Invalid value for add-frag-size");
> +		return -EINVAL;
> +	}

This check should belong in ethtool, since there's nothing unusual about
this supported range.

Also, I believe that Jakub requested the min-frag-size to be passed as
0, 1, 2, 3 as the standard specifies it, and not its multiplied version?

> +
> +	adapter->frame_preemption_active = fpcmd->enabled;
> +	adapter->add_frag_size = fpcmd->add_frag_size;
> +
> +	if (!adapter->frame_preemption_active)
> +		goto done;
> +
> +	/* Enabling frame preemption requires TSN mode to be enabled,
> +	 * which requires a schedule to be active. So, if there isn't
> +	 * a schedule already configured, configure a simple one, with
> +	 * all queues open, with 1ms cycle time.
> +	 */
> +	if (adapter->base_time)
> +		goto done;

Unless I'm missing something, you are interpreting an adapter->base_time
value of zero as "no Qbv schedule on port", as if it was invalid to have
a base-time of zero, which it isn't.

> @@ -115,6 +130,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
>  		if (ring->launchtime_enable)
>  			txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
>  
> +		if (ring->preemptible)
> +			txqctl |= IGC_TXQCTL_PREEMPTABLE;

I think this is the only place in the series where you use PREEMPTABLE
instead of PREEMPTIBLE.

> +
>  		wr32(IGC_TXQCTL(i), txqctl);
>  	}

Out of curiosity, where is the ring to traffic class mapping configured
in the igc driver? I suppose that you have more rings than traffic classes.
Vinicius Costa Gomes Jan. 29, 2021, 9:27 p.m. UTC | #2
Vladimir Oltean <vladimir.oltean@nxp.com> writes:

> On Fri, Jan 22, 2021 at 02:44:51PM -0800, Vinicius Costa Gomes wrote:
>> The tc subsystem sets which queues are marked as preemptible, it's the
>> role of ethtool to control more hardware specific parameters. These
>> parameters include:
>> 
>>  - enabling the frame preemption hardware: As enabling frame
>>  preemption may have other requirements before it can be enabled, it's
>>  exposed via the ethtool API;
>> 
>>  - mininum fragment size multiplier: expressed in usually in the form
>>  of (1 + N)*64, this number indicates what's the size of the minimum
>>  fragment that can be preempted.
>
> And not one word has been said about the patch...

If I am undertanding this right. Will fix the commit message.

>
>> 
>> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
>> ---
>>  drivers/net/ethernet/intel/igc/igc.h         | 12 +++++
>>  drivers/net/ethernet/intel/igc/igc_defines.h |  4 ++
>>  drivers/net/ethernet/intel/igc/igc_ethtool.c | 53 ++++++++++++++++++++
>>  drivers/net/ethernet/intel/igc/igc_tsn.c     | 25 +++++++--
>>  4 files changed, 91 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
>> index 35baae900c1f..1067c46e0bc2 100644
>> --- a/drivers/net/ethernet/intel/igc/igc.h
>> +++ b/drivers/net/ethernet/intel/igc/igc.h
>> @@ -87,6 +87,7 @@ struct igc_ring {
>>  	u8 queue_index;                 /* logical index of the ring*/
>>  	u8 reg_idx;                     /* physical index of the ring */
>>  	bool launchtime_enable;         /* true if LaunchTime is enabled */
>> +	bool preemptible;		/* true if not express */
>
> Mixing tabs and spaces?

Ugh. Will fix. Thanks.

>
>> +static int igc_ethtool_set_preempt(struct net_device *netdev,
>> +				   struct ethtool_fp *fpcmd,
>> +				   struct netlink_ext_ack *extack)
>> +{
>> +	struct igc_adapter *adapter = netdev_priv(netdev);
>> +	int i;
>> +
>> +	if (fpcmd->add_frag_size < 68 || fpcmd->add_frag_size > 260) {
>> +		NL_SET_ERR_MSG_MOD(extack, "Invalid value for add-frag-size");
>> +		return -EINVAL;
>> +	}
>
> This check should belong in ethtool, since there's nothing unusual about
> this supported range.
>
> Also, I believe that Jakub requested the min-frag-size to be passed as
> 0, 1, 2, 3 as the standard specifies it, and not its multiplied
> version?

Later, Michal Kubechek suggested using the multiplied value, to be
future proof and less dependent on some specific standard version.

>
>> +
>> +	adapter->frame_preemption_active = fpcmd->enabled;
>> +	adapter->add_frag_size = fpcmd->add_frag_size;
>> +
>> +	if (!adapter->frame_preemption_active)
>> +		goto done;
>> +
>> +	/* Enabling frame preemption requires TSN mode to be enabled,
>> +	 * which requires a schedule to be active. So, if there isn't
>> +	 * a schedule already configured, configure a simple one, with
>> +	 * all queues open, with 1ms cycle time.
>> +	 */
>> +	if (adapter->base_time)
>> +		goto done;
>
> Unless I'm missing something, you are interpreting an adapter->base_time
> value of zero as "no Qbv schedule on port", as if it was invalid to have
> a base-time of zero, which it isn't.

This HW has specific limitations, it doesn't allow a base_time in the
past. So a base_time of zero can be used to signify "No Qbv".

>
>> @@ -115,6 +130,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
>>  		if (ring->launchtime_enable)
>>  			txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
>>  
>> +		if (ring->preemptible)
>> +			txqctl |= IGC_TXQCTL_PREEMPTABLE;
>
> I think this is the only place in the series where you use PREEMPTABLE
> instead of PREEMPTIBLE.

Yeah, on the datasheet it's written PREEMPTABLE, I chose to use this
spelling to make it easier to search for this bit in the datasheet.

>
>> +
>>  		wr32(IGC_TXQCTL(i), txqctl);
>>  	}
>
> Out of curiosity, where is the ring to traffic class mapping configured
> in the igc driver? I suppose that you have more rings than traffic classes.

The driver follows the default behaviour, that netdev->queue[0] maps to
ring[0], queue[1] to ring[1], and so on. And by default ring[0] has
higher priority than ring[1], ring[1] higher than ring[2], and so on.

The HW only has 4 rings/queues.


Cheers,
Vladimir Oltean Jan. 29, 2021, 11:16 p.m. UTC | #3
On Fri, Jan 29, 2021 at 01:27:28PM -0800, Vinicius Costa Gomes wrote:
> >> +static int igc_ethtool_set_preempt(struct net_device *netdev,
> >> +				   struct ethtool_fp *fpcmd,
> >> +				   struct netlink_ext_ack *extack)
> >> +{
> >> +	struct igc_adapter *adapter = netdev_priv(netdev);
> >> +	int i;
> >> +
> >> +	if (fpcmd->add_frag_size < 68 || fpcmd->add_frag_size > 260) {
> >> +		NL_SET_ERR_MSG_MOD(extack, "Invalid value for add-frag-size");
> >> +		return -EINVAL;
> >> +	}
> >
> > This check should belong in ethtool, since there's nothing unusual about
> > this supported range.
> >
> > Also, I believe that Jakub requested the min-frag-size to be passed as
> > 0, 1, 2, 3 as the standard specifies it, and not its multiplied
> > version?
> 
> Later, Michal Kubechek suggested using the multiplied value, to be
> future proof and less dependent on some specific standard version.

Imagine you're adding frame preemption to some LLDP agent and you want
to pass that data to the kernel. Case (a) you read the value as {0, 1,
2, 3} and you pass it to the kernel as {0, 1, 2, 3}. Case (b) you read
the value as {0, 1, 2, 3}, you prove to the kernel that you're a smart
boy and you know the times 64 multiplication table minus 4, and if you
pass the exam, the kernel calls ethtool_frag_size_to_mult again, to
retrieve the {0, 1, 2, 3} form which it'll use to program the hardware
with (oh and btw, ethtool_frag_size_to_mult allows any value to be
passed in, so it tricks users into thinking that any other value except
60, 124, 188, 252 might have a different effect, cause them to wonder
what is 123 even rounded to, etc).
And halfway through writing the user space code for case (b), you're
thinking "good thing I'm making this LLDP TLV more future-proof by
multiplying it with 64..."

Also, so shady this specific standard called IEEE 802.3-2018.....
Frame preemption is past draft status, it's safe to say it won't change
in backwards-incompatible ways, this isn't Python. If anything, we'll
give them another reason not to.

> >> +	adapter->frame_preemption_active = fpcmd->enabled;
> >> +	adapter->add_frag_size = fpcmd->add_frag_size;
> >> +
> >> +	if (!adapter->frame_preemption_active)
> >> +		goto done;
> >> +
> >> +	/* Enabling frame preemption requires TSN mode to be enabled,
> >> +	 * which requires a schedule to be active. So, if there isn't
> >> +	 * a schedule already configured, configure a simple one, with
> >> +	 * all queues open, with 1ms cycle time.
> >> +	 */
> >> +	if (adapter->base_time)
> >> +		goto done;
> >
> > Unless I'm missing something, you are interpreting an adapter->base_time
> > value of zero as "no Qbv schedule on port", as if it was invalid to have
> > a base-time of zero, which it isn't.
> 
> This HW has specific limitations, it doesn't allow a base_time in the
> past. So a base_time of zero can be used to signify "No Qbv".

Oh and by past you mean future?

	/* If we program the controller's BASET registers with a time
	 * in the future, it will hold all the packets until that
	 * time, causing a lot of TX Hangs, so to avoid that, we
	 * reject schedules that would start in the future.
	 */
	if (!is_base_time_past(qopt->base_time, &now))
		return false;

Buggy hardware notwithstanding, but you wrote in "man 8 tc-taprio" that

       base-time
              Specifies the instant in nanoseconds, using the reference
              of clockid, defining the time when the schedule starts. If
              'base-time' is a time in the past, the schedule will start
              at

              base-time + (N * cycle-time)

              where N is the smallest integer so the resulting time is
              greater than "now", and "cycle-time" is the sum of all the
              intervals of the entries in the schedule

Does that not apply to schedules offloaded on Intel hardware?
You're okay with any base-time in the past (your hardware basically
requires them) but the base-time of zero is somehow special and not
valid because?

> > Out of curiosity, where is the ring to traffic class mapping configured
> > in the igc driver? I suppose that you have more rings than traffic classes.
> 
> The driver follows the default behaviour, that netdev->queue[0] maps to
> ring[0], queue[1] to ring[1], and so on. And by default ring[0] has
> higher priority than ring[1], ring[1] higher than ring[2], and so on.
> 
> The HW only has 4 rings/queues.

I meant to ask: is the priority of rings 0, 1, 2, 3 configurable? If so,
where is it configured by the driver? I want to understand better the
world that you're coming from, with this whole "preemptable rings"
instead of "preemptable traffic classes" thing.
IEEE 802.1Q-2018 clause 12.30.1.1.1 framePreemptionAdminStatus talks
about reporting preemption status per priority/traffic class, not per
queue/ring. Granted, I may be trapped in my own strange world here, but
say a driver has 16 rings mapped to 8 priorities like enetc does, I
think it's super odd that taprio calls tc_map_to_queue_mask before
passing the gate_mask to ndo_setup_tc. It's the kind of thing that makes
you wonder if you should put some sort of paranoid conflict resolution
checks in the code, what if queues 0 and 1, which have the same
priority, have different values in the gate_mask, what do I program into
my hardware which operates per priority as the standard actually says?
Vladimir Oltean March 3, 2021, 1:07 a.m. UTC | #4
On Fri, Jan 22, 2021 at 02:44:51PM -0800, Vinicius Costa Gomes wrote:
> diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
> index 0e78abfd99ee..c2155d109bd6 100644
> --- a/drivers/net/ethernet/intel/igc/igc_defines.h
> +++ b/drivers/net/ethernet/intel/igc/igc_defines.h
> @@ -410,10 +410,14 @@
>  /* Transmit Scheduling */
>  #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN	0x00000001
>  #define IGC_TQAVCTRL_ENHANCED_QAV	0x00000008
> +#define IGC_TQAVCTRL_PREEMPT_ENA	0x00000002
> +#define IGC_TQAVCTRL_MIN_FRAG_MASK	0x0000C000
> +#define IGC_TQAVCTRL_MIN_FRAG_SHIFT	14
> @@ -83,13 +89,22 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
>  	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
>  	wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
>  
> -	tqavctrl = rd32(IGC_TQAVCTRL);
>  	rxpbs = rd32(IGC_RXPBS) & ~IGC_RXPBSIZE_SIZE_MASK;
>  	rxpbs |= IGC_RXPBSIZE_TSN;
>  
>  	wr32(IGC_RXPBS, rxpbs);
>  
> +	tqavctrl = rd32(IGC_TQAVCTRL) &
> +		~(IGC_TQAVCTRL_MIN_FRAG_MASK | IGC_TQAVCTRL_PREEMPT_ENA);
>  	tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
> +
> +	if (adapter->frame_preemption_active)
> +		tqavctrl |= IGC_TQAVCTRL_PREEMPT_ENA;

Question: when adapter->frame_preemption_active is false, does the port
have the pMAC enabled, and can it receive preemptable frames? Maybe we
should be very explicit that the ethtool frame preemption only configures
the egress side, and that a driver capable of FP should always turn on
the pMAC on RX. Are you aware of any performance downsides?

> +
> +	frag_size_mult = ethtool_frag_size_to_mult(adapter->add_frag_size);
> +
> +	tqavctrl |= frag_size_mult << IGC_TQAVCTRL_MIN_FRAG_SHIFT;
> +
>  	wr32(IGC_TQAVCTRL, tqavctrl);
>  
>  	wr32(IGC_QBVCYCLET_S, cycle);
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 35baae900c1f..1067c46e0bc2 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -87,6 +87,7 @@  struct igc_ring {
 	u8 queue_index;                 /* logical index of the ring*/
 	u8 reg_idx;                     /* physical index of the ring */
 	bool launchtime_enable;         /* true if LaunchTime is enabled */
+	bool preemptible;		/* true if not express */
 
 	u32 start_time;
 	u32 end_time;
@@ -165,6 +166,8 @@  struct igc_adapter {
 
 	ktime_t base_time;
 	ktime_t cycle_time;
+	bool frame_preemption_active;
+	u8 add_frag_size;
 
 	/* OS defined structs */
 	struct pci_dev *pdev;
@@ -262,6 +265,10 @@  extern char igc_driver_name[];
 #define IGC_FLAG_VLAN_PROMISC		BIT(15)
 #define IGC_FLAG_RX_LEGACY		BIT(16)
 #define IGC_FLAG_TSN_QBV_ENABLED	BIT(17)
+#define IGC_FLAG_TSN_PREEMPT_ENABLED	BIT(18)
+
+#define IGC_FLAG_TSN_ANY_ENABLED \
+	(IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_PREEMPT_ENABLED)
 
 #define IGC_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
 #define IGC_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
@@ -310,6 +317,11 @@  extern char igc_driver_name[];
 #define IGC_I225_RX_LATENCY_1000	300
 #define IGC_I225_RX_LATENCY_2500	1485
 
+/* From the datasheet section 8.12.4 Tx Qav Control TQAVCTRL,
+ * MIN_FRAG initial value.
+ */
+#define IGC_I225_MIN_FRAG_SIZE_DEFAULT	68
+
 /* RX and TX descriptor control thresholds.
  * PTHRESH - MAC will consider prefetch if it has fewer than this number of
  *           descriptors available in its onboard memory.
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 0e78abfd99ee..c2155d109bd6 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -410,10 +410,14 @@ 
 /* Transmit Scheduling */
 #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN	0x00000001
 #define IGC_TQAVCTRL_ENHANCED_QAV	0x00000008
+#define IGC_TQAVCTRL_PREEMPT_ENA	0x00000002
+#define IGC_TQAVCTRL_MIN_FRAG_MASK	0x0000C000
+#define IGC_TQAVCTRL_MIN_FRAG_SHIFT	14
 
 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT	0x00000001
 #define IGC_TXQCTL_STRICT_CYCLE		0x00000002
 #define IGC_TXQCTL_STRICT_END		0x00000004
+#define IGC_TXQCTL_PREEMPTABLE		0x00000008
 
 /* Receive Checksum Control */
 #define IGC_RXCSUM_CRCOFL	0x00000800   /* CRC32 offload enable */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 61d331ce38cd..e71edbbe08b4 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -8,6 +8,7 @@ 
 
 #include "igc.h"
 #include "igc_diag.h"
+#include "igc_tsn.h"
 
 /* forward declaration */
 struct igc_stats {
@@ -1636,6 +1637,56 @@  static int igc_ethtool_set_eee(struct net_device *netdev,
 	return 0;
 }
 
+static int igc_ethtool_get_preempt(struct net_device *netdev,
+				   struct ethtool_fp *fpcmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	fpcmd->enabled = adapter->frame_preemption_active;
+	fpcmd->add_frag_size = adapter->add_frag_size;
+
+	return 0;
+}
+
+static int igc_ethtool_set_preempt(struct net_device *netdev,
+				   struct ethtool_fp *fpcmd,
+				   struct netlink_ext_ack *extack)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (fpcmd->add_frag_size < 68 || fpcmd->add_frag_size > 260) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid value for add-frag-size");
+		return -EINVAL;
+	}
+
+	adapter->frame_preemption_active = fpcmd->enabled;
+	adapter->add_frag_size = fpcmd->add_frag_size;
+
+	if (!adapter->frame_preemption_active)
+		goto done;
+
+	/* Enabling frame preemption requires TSN mode to be enabled,
+	 * which requires a schedule to be active. So, if there isn't
+	 * a schedule already configured, configure a simple one, with
+	 * all queues open, with 1ms cycle time.
+	 */
+	if (adapter->base_time)
+		goto done;
+
+	adapter->cycle_time = NSEC_PER_MSEC;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		ring->start_time = 0;
+		ring->end_time = NSEC_PER_MSEC;
+	}
+
+done:
+	return igc_tsn_offload_apply(adapter);
+}
+
 static int igc_ethtool_begin(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
@@ -1915,6 +1966,8 @@  static const struct ethtool_ops igc_ethtool_ops = {
 	.get_ts_info		= igc_ethtool_get_ts_info,
 	.get_channels		= igc_ethtool_get_channels,
 	.set_channels		= igc_ethtool_set_channels,
+	.get_preempt		= igc_ethtool_get_preempt,
+	.set_preempt		= igc_ethtool_set_preempt,
 	.get_priv_flags		= igc_ethtool_get_priv_flags,
 	.set_priv_flags		= igc_ethtool_set_priv_flags,
 	.get_eee		= igc_ethtool_get_eee,
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index f5a5527adb21..31aa9eed3ae3 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -30,7 +30,10 @@  static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 	if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED))
 		return 0;
 
+	adapter->base_time = 0;
 	adapter->cycle_time = 0;
+	adapter->frame_preemption_active = false;
+	adapter->add_frag_size = IGC_I225_MIN_FRAG_SIZE_DEFAULT;
 
 	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
 	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
@@ -42,7 +45,8 @@  static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 
 	tqavctrl = rd32(IGC_TQAVCTRL);
 	tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
-		      IGC_TQAVCTRL_ENHANCED_QAV);
+		      IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_PREEMPT_ENA |
+		      IGC_TQAVCTRL_MIN_FRAG_MASK);
 	wr32(IGC_TQAVCTRL, tqavctrl);
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
@@ -51,6 +55,7 @@  static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 		ring->start_time = 0;
 		ring->end_time = 0;
 		ring->launchtime_enable = false;
+		ring->preemptible = false;
 
 		wr32(IGC_TXQCTL(i), 0);
 		wr32(IGC_STQT(i), 0);
@@ -71,6 +76,7 @@  static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 	u32 tqavctrl, baset_l, baset_h;
 	u32 sec, nsec, cycle, rxpbs;
 	ktime_t base_time, systim;
+	u8 frag_size_mult;
 	int i;
 
 	if (adapter->flags & IGC_FLAG_TSN_QBV_ENABLED)
@@ -83,13 +89,22 @@  static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
 	wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
 
-	tqavctrl = rd32(IGC_TQAVCTRL);
 	rxpbs = rd32(IGC_RXPBS) & ~IGC_RXPBSIZE_SIZE_MASK;
 	rxpbs |= IGC_RXPBSIZE_TSN;
 
 	wr32(IGC_RXPBS, rxpbs);
 
+	tqavctrl = rd32(IGC_TQAVCTRL) &
+		~(IGC_TQAVCTRL_MIN_FRAG_MASK | IGC_TQAVCTRL_PREEMPT_ENA);
 	tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
+
+	if (adapter->frame_preemption_active)
+		tqavctrl |= IGC_TQAVCTRL_PREEMPT_ENA;
+
+	frag_size_mult = ethtool_frag_size_to_mult(adapter->add_frag_size);
+
+	tqavctrl |= frag_size_mult << IGC_TQAVCTRL_MIN_FRAG_SHIFT;
+
 	wr32(IGC_TQAVCTRL, tqavctrl);
 
 	wr32(IGC_QBVCYCLET_S, cycle);
@@ -115,6 +130,9 @@  static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 		if (ring->launchtime_enable)
 			txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
 
+		if (ring->preemptible)
+			txqctl |= IGC_TXQCTL_PREEMPTABLE;
+
 		wr32(IGC_TXQCTL(i), txqctl);
 	}
 
@@ -142,7 +160,8 @@  static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 
 int igc_tsn_offload_apply(struct igc_adapter *adapter)
 {
-	bool is_any_enabled = adapter->base_time || is_any_launchtime(adapter);
+	bool is_any_enabled = adapter->base_time ||
+		is_any_launchtime(adapter) || adapter->frame_preemption_active;
 
 	if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) && !is_any_enabled)
 		return 0;