diff mbox series

[iwl-next,v4,02/12] ice: Add function to get and set TX queue context

Message ID 20231121025111.257597-3-yahui.cao@intel.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Add E800 live migration driver | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/codegen success Generated files up to date
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1127 this patch: 1127
netdev/cc_maintainers warning 2 maintainers not CCed: jesse.brandeburg@intel.com anthony.l.nguyen@intel.com
netdev/build_clang success Errors and warnings before: 1154 this patch: 1154
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1154 this patch: 1154
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 288 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Cao, Yahui Nov. 21, 2023, 2:51 a.m. UTC
Export TX queue context get and set function which is consumed by linux
live migration driver to save and load device state.

TX queue context contains static fields which does not change during TX
traffic and dynamic fields which may change during TX traffic.

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c   | 216 +++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_common.h   |   6 +
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  15 ++
 .../net/ethernet/intel/ice/ice_lan_tx_rx.h    |   3 +
 4 files changed, 239 insertions(+), 1 deletion(-)

Comments

Brett Creeley Dec. 8, 2023, 10:14 p.m. UTC | #1
On 11/20/2023 6:51 PM, Yahui Cao wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
> 
> 
> Export TX queue context get and set function which is consumed by linux
> live migration driver to save and load device state.

Nit, but I don't think "linux" needs to be mentioned here.

> 
> TX queue context contains static fields which does not change during TX
> traffic and dynamic fields which may change during TX traffic.
> 
> Signed-off-by: Yahui Cao <yahui.cao@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_common.c   | 216 +++++++++++++++++-
>   drivers/net/ethernet/intel/ice/ice_common.h   |   6 +
>   .../net/ethernet/intel/ice/ice_hw_autogen.h   |  15 ++
>   .../net/ethernet/intel/ice/ice_lan_tx_rx.h    |   3 +
>   4 files changed, 239 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index d0a3bed00921..8577a5ef423e 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -1645,7 +1645,10 @@ ice_read_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
>          return ice_get_ctx(ctx_buf, (u8 *)rlan_ctx, ice_rlan_ctx_info);
>   }
> 
> -/* LAN Tx Queue Context */
> +/* LAN Tx Queue Context used for set Tx config by ice_aqc_opc_add_txqs,
> + * Bit[0-175] is valid
> + */
> +
>   const struct ice_ctx_ele ice_tlan_ctx_info[] = {
>                                      /* Field                    Width   LSB */
>          ICE_CTX_STORE(ice_tlan_ctx, base,                       57,     0),
> @@ -1679,6 +1682,217 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
>          { 0 }
>   };
> 
> +/* LAN Tx Queue Context used for get Tx config from QTXCOMM_CNTX data,
> + * Bit[0-292] is valid, including internal queue state. Since internal
> + * queue state is dynamic field, its value will be cleared once queue
> + * is disabled
> + */
> +static const struct ice_ctx_ele ice_tlan_ctx_data_info[] = {
> +                                   /* Field                    Width   LSB */
> +       ICE_CTX_STORE(ice_tlan_ctx, base,                       57,     0),
> +       ICE_CTX_STORE(ice_tlan_ctx, port_num,                   3,      57),
> +       ICE_CTX_STORE(ice_tlan_ctx, cgd_num,                    5,      60),
> +       ICE_CTX_STORE(ice_tlan_ctx, pf_num,                     3,      65),
> +       ICE_CTX_STORE(ice_tlan_ctx, vmvf_num,                   10,     68),
> +       ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,                  2,      78),
> +       ICE_CTX_STORE(ice_tlan_ctx, src_vsi,                    10,     80),
> +       ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,                   1,      90),
> +       ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag,        1,      91),
> +       ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,                   1,      92),
> +       ICE_CTX_STORE(ice_tlan_ctx, cpuid,                      8,      93),
> +       ICE_CTX_STORE(ice_tlan_ctx, wb_mode,                    1,      101),
> +       ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc,                 1,      102),
> +       ICE_CTX_STORE(ice_tlan_ctx, tphrd,                      1,      103),
> +       ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc,                 1,      104),
> +       ICE_CTX_STORE(ice_tlan_ctx, cmpq_id,                    9,      105),
> +       ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func,               14,     114),
> +       ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode,      1,      128),
> +       ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id,             6,      129),
> +       ICE_CTX_STORE(ice_tlan_ctx, qlen,                       13,     135),
> +       ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx,            4,      148),
> +       ICE_CTX_STORE(ice_tlan_ctx, tso_ena,                    1,      152),
> +       ICE_CTX_STORE(ice_tlan_ctx, tso_qnum,                   11,     153),
> +       ICE_CTX_STORE(ice_tlan_ctx, legacy_int,                 1,      164),
> +       ICE_CTX_STORE(ice_tlan_ctx, drop_ena,                   1,      165),
> +       ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,             2,      166),
> +       ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,        3,      168),
> +       ICE_CTX_STORE(ice_tlan_ctx, tail,                       13,     184),
> +       { 0 }
> +};
> +
> +/**
> + * ice_copy_txq_ctx_from_hw - Copy txq context register from HW
> + * @hw: pointer to the hardware structure
> + * @ice_txq_ctx: pointer to the txq context
> + *
> + * Copy txq context from HW register space to dense structure
> + */
> +static int
> +ice_copy_txq_ctx_from_hw(struct ice_hw *hw, u8 *ice_txq_ctx)
> +{
> +       u8 i;
> +
> +       if (!ice_txq_ctx)
> +               return -EINVAL;
> +
> +       /* Copy each dword separately from HW */
> +       for (i = 0; i < ICE_TXQ_CTX_SIZE_DWORDS; i++) {
> +               u32 *ctx = (u32 *)(ice_txq_ctx + (i * sizeof(u32)));
> +
> +               *ctx = rd32(hw, GLCOMM_QTX_CNTX_DATA(i));
> +
> +               ice_debug(hw, ICE_DBG_QCTX, "qtxdata[%d]: %08X\n", i, *ctx);
> +       }
> +
> +       return 0;
> +}
> +
> +/**
> + * ice_copy_txq_ctx_to_hw - Copy txq context register into HW
> + * @hw: pointer to the hardware structure
> + * @ice_txq_ctx: pointer to the txq context
> + *
> + * Copy txq context from dense structure to HW register space
> + */
> +static int
> +ice_copy_txq_ctx_to_hw(struct ice_hw *hw, u8 *ice_txq_ctx)
> +{
> +       u8 i;
> +
> +       if (!ice_txq_ctx)
> +               return -EINVAL;
> +
> +       /* Copy each dword separately to HW */
> +       for (i = 0; i < ICE_TXQ_CTX_SIZE_DWORDS; i++) {
> +               u32 *ctx = (u32 *)(ice_txq_ctx + (i * sizeof(u32)));
> +
> +               wr32(hw, GLCOMM_QTX_CNTX_DATA(i), *ctx);
> +
> +               ice_debug(hw, ICE_DBG_QCTX, "qtxdata[%d]: %08X\n", i, *ctx);
> +       }
> +
> +       return 0;
> +}
> +
> +/* Configuration access to tx ring context(from PF) is done via indirect
> + * interface, GLCOMM_QTX_CNTX_CTL/DATA registers. However, there registers

s/there/these

> + * are shared by all the PFs with single PCI card. Hence multiplied PF may
> + * access there registers simultaneously, causing access conflicts. Then

s/there/these

> + * card-level grained locking is required to protect these registers from
> + * being competed by PF devices within the same card. However, there is no
> + * such kind of card-level locking supported. Introduce a coarse grained
> + * global lock which is shared by all the PF driver.

Not sure if this has any unexpected consequences, but the lock will also 
be shared between PFs of separate cards on the same system as well.

> + *
> + * The overall flow is to acquire the lock, read/write TXQ context through
> + * GLCOMM_QTX_CNTX_CTL/DATA indirect interface and release the lock once
> + * access is completed. In this way, only one PF can have access to TXQ
> + * context safely.
> + */
> +static DEFINE_MUTEX(ice_global_txq_ctx_lock); > +
> +/**
> + * ice_read_txq_ctx - Read txq context from HW
> + * @hw: pointer to the hardware structure
> + * @tlan_ctx: pointer to the txq context
> + * @txq_index: the index of the Tx queue
> + *
> + * Read txq context from HW register space and then convert it from dense
> + * structure to sparse
> + */
> +int
> +ice_read_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
> +                u32 txq_index)
> +{
> +       u8 ctx_buf[ICE_TXQ_CTX_SZ] = { 0 };
> +       int status;
> +       u32 txq_base;
> +       u32 cmd, reg;
> +
> +       if (!tlan_ctx)
> +               return -EINVAL;
> +
> +       if (txq_index > QTX_COMM_HEAD_MAX_INDEX)
> +               return -EINVAL;
> +
> +       /* Get TXQ base within card space */
> +       txq_base = rd32(hw, PFLAN_TX_QALLOC(hw->pf_id));
> +       txq_base = (txq_base & PFLAN_TX_QALLOC_FIRSTQ_M) >>
> +                  PFLAN_TX_QALLOC_FIRSTQ_S;
> +
> +       cmd = (GLCOMM_QTX_CNTX_CTL_CMD_READ
> +               << GLCOMM_QTX_CNTX_CTL_CMD_S) & GLCOMM_QTX_CNTX_CTL_CMD_M;
> +       reg = cmd | GLCOMM_QTX_CNTX_CTL_CMD_EXEC_M |
> +             (((txq_base + txq_index) << GLCOMM_QTX_CNTX_CTL_QUEUE_ID_S) &
> +              GLCOMM_QTX_CNTX_CTL_QUEUE_ID_M);
> +
> +       mutex_lock(&ice_global_txq_ctx_lock);
> +
> +       wr32(hw, GLCOMM_QTX_CNTX_CTL, reg);
> +       ice_flush(hw);
> +
> +       status = ice_copy_txq_ctx_from_hw(hw, ctx_buf);
> +       if (status) {
> +               mutex_unlock(&ice_global_txq_ctx_lock);
> +               return status;
> +       }
> +
> +       mutex_unlock(&ice_global_txq_ctx_lock);
> +
> +       return ice_get_ctx(ctx_buf, (u8 *)tlan_ctx, ice_tlan_ctx_data_info);
> +}
> +
> +/**
> + * ice_write_txq_ctx - Write txq context from HW
> + * @hw: pointer to the hardware structure
> + * @tlan_ctx: pointer to the txq context
> + * @txq_index: the index of the Tx queue
> + *
> + * Convert txq context from sparse to dense structure and then write
> + * it to HW register space
> + */
> +int
> +ice_write_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
> +                 u32 txq_index)
> +{
> +       u8 ctx_buf[ICE_TXQ_CTX_SZ] = { 0 };
> +       int status;
> +       u32 txq_base;
> +       u32 cmd, reg;
> +
> +       if (!tlan_ctx)
> +               return -EINVAL;
> +
> +       if (txq_index > QTX_COMM_HEAD_MAX_INDEX)
> +               return -EINVAL;
> +
> +       ice_set_ctx(hw, (u8 *)tlan_ctx, ctx_buf, ice_tlan_ctx_info);
> +
> +       /* Get TXQ base within card space */
> +       txq_base = rd32(hw, PFLAN_TX_QALLOC(hw->pf_id));
> +       txq_base = (txq_base & PFLAN_TX_QALLOC_FIRSTQ_M) >>
> +                  PFLAN_TX_QALLOC_FIRSTQ_S;
> +
> +       cmd = (GLCOMM_QTX_CNTX_CTL_CMD_WRITE_NO_DYN
> +               << GLCOMM_QTX_CNTX_CTL_CMD_S) & GLCOMM_QTX_CNTX_CTL_CMD_M;
> +       reg = cmd | GLCOMM_QTX_CNTX_CTL_CMD_EXEC_M |
> +             (((txq_base + txq_index) << GLCOMM_QTX_CNTX_CTL_QUEUE_ID_S) &
> +              GLCOMM_QTX_CNTX_CTL_QUEUE_ID_M);
> +
> +       mutex_lock(&ice_global_txq_ctx_lock);
> +
> +       status = ice_copy_txq_ctx_to_hw(hw, ctx_buf);
> +       if (status) {
> +               mutex_lock(&ice_global_txq_ctx_lock);
> +               return status;
> +       }
> +
> +       wr32(hw, GLCOMM_QTX_CNTX_CTL, reg);
> +       ice_flush(hw);
> +
> +       mutex_unlock(&ice_global_txq_ctx_lock);
> +
> +       return 0;
> +}
>   /* Sideband Queue command wrappers */
> 
>   /**
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
> index df9c7f30592a..40fbb9088475 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.h
> +++ b/drivers/net/ethernet/intel/ice/ice_common.h
> @@ -58,6 +58,12 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
>   int
>   ice_read_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
>                   u32 rxq_index);
> +int
> +ice_read_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
> +                u32 txq_index);
> +int
> +ice_write_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
> +                 u32 txq_index);
> 
>   int
>   ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params);
> diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> index 86936b758ade..7410da715ad4 100644
> --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
> @@ -8,6 +8,7 @@
> 
>   #define QTX_COMM_DBELL(_DBQM)                  (0x002C0000 + ((_DBQM) * 4))
>   #define QTX_COMM_HEAD(_DBQM)                   (0x000E0000 + ((_DBQM) * 4))
> +#define QTX_COMM_HEAD_MAX_INDEX                        16383
>   #define QTX_COMM_HEAD_HEAD_S                   0
>   #define QTX_COMM_HEAD_HEAD_M                   ICE_M(0x1FFF, 0)
>   #define PF_FW_ARQBAH                           0x00080180
> @@ -258,6 +259,9 @@
>   #define VPINT_ALLOC_PCI_VALID_M                        BIT(31)
>   #define VPINT_MBX_CTL(_VSI)                    (0x0016A000 + ((_VSI) * 4))
>   #define VPINT_MBX_CTL_CAUSE_ENA_M              BIT(30)
> +#define PFLAN_TX_QALLOC(_PF)                   (0x001D2580 + ((_PF) * 4))
> +#define PFLAN_TX_QALLOC_FIRSTQ_S               0
> +#define PFLAN_TX_QALLOC_FIRSTQ_M               ICE_M(0x3FFF, 0)
>   #define GLLAN_RCTL_0                           0x002941F8
>   #define QRX_CONTEXT(_i, _QRX)                  (0x00280000 + ((_i) * 8192 + (_QRX) * 4))
>   #define QRX_CTRL(_QRX)                         (0x00120000 + ((_QRX) * 4))
> @@ -362,6 +366,17 @@
>   #define GLNVM_ULD_POR_DONE_1_M                 BIT(8)
>   #define GLNVM_ULD_PCIER_DONE_2_M               BIT(9)
>   #define GLNVM_ULD_PE_DONE_M                    BIT(10)
> +#define GLCOMM_QTX_CNTX_CTL                    0x002D2DC8
> +#define GLCOMM_QTX_CNTX_CTL_QUEUE_ID_S         0
> +#define GLCOMM_QTX_CNTX_CTL_QUEUE_ID_M         ICE_M(0x3FFF, 0)
> +#define GLCOMM_QTX_CNTX_CTL_CMD_S              16
> +#define GLCOMM_QTX_CNTX_CTL_CMD_M              ICE_M(0x7, 16)
> +#define GLCOMM_QTX_CNTX_CTL_CMD_READ           0
> +#define GLCOMM_QTX_CNTX_CTL_CMD_WRITE          1
> +#define GLCOMM_QTX_CNTX_CTL_CMD_RESET          3
> +#define GLCOMM_QTX_CNTX_CTL_CMD_WRITE_NO_DYN   4
> +#define GLCOMM_QTX_CNTX_CTL_CMD_EXEC_M         BIT(19)
> +#define GLCOMM_QTX_CNTX_DATA(_i)               (0x002D2D40 + ((_i) * 4))
>   #define GLPCI_CNF2                             0x000BE004
>   #define GLPCI_CNF2_CACHELINE_SIZE_M            BIT(1)
>   #define PF_FUNC_RID                            0x0009E880
> diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
> index 89f986a75cc8..79e07c863ae0 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
> @@ -431,6 +431,8 @@ enum ice_rx_flex_desc_status_error_1_bits {
> 
>   #define ICE_RXQ_CTX_SIZE_DWORDS                8
>   #define ICE_RXQ_CTX_SZ                 (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
> +#define ICE_TXQ_CTX_SIZE_DWORDS                10
> +#define ICE_TXQ_CTX_SZ                 (ICE_TXQ_CTX_SIZE_DWORDS * sizeof(u32))
>   #define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS 22
>   #define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS        5
>   #define GLTCLAN_CQ_CNTX(i, CQ)         (GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
> @@ -649,6 +651,7 @@ struct ice_tlan_ctx {
>          u8 cache_prof_idx;
>          u8 pkt_shaper_prof_idx;
>          u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */
> +       u16 tail;
>   };
> 
>   /* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the
> --
> 2.34.1
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index d0a3bed00921..8577a5ef423e 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1645,7 +1645,10 @@  ice_read_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 	return ice_get_ctx(ctx_buf, (u8 *)rlan_ctx, ice_rlan_ctx_info);
 }
 
-/* LAN Tx Queue Context */
+/* LAN Tx Queue Context used for set Tx config by ice_aqc_opc_add_txqs,
+ * Bit[0-175] is valid
+ */
+
 const struct ice_ctx_ele ice_tlan_ctx_info[] = {
 				    /* Field			Width	LSB */
 	ICE_CTX_STORE(ice_tlan_ctx, base,			57,	0),
@@ -1679,6 +1682,217 @@  const struct ice_ctx_ele ice_tlan_ctx_info[] = {
 	{ 0 }
 };
 
+/* LAN Tx Queue Context used for get Tx config from QTXCOMM_CNTX data,
+ * Bit[0-292] is valid, including internal queue state. Since internal
+ * queue state is dynamic field, its value will be cleared once queue
+ * is disabled
+ */
+static const struct ice_ctx_ele ice_tlan_ctx_data_info[] = {
+				    /* Field			Width	LSB */
+	ICE_CTX_STORE(ice_tlan_ctx, base,			57,	0),
+	ICE_CTX_STORE(ice_tlan_ctx, port_num,			3,	57),
+	ICE_CTX_STORE(ice_tlan_ctx, cgd_num,			5,	60),
+	ICE_CTX_STORE(ice_tlan_ctx, pf_num,			3,	65),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_num,			10,	68),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,			2,	78),
+	ICE_CTX_STORE(ice_tlan_ctx, src_vsi,			10,	80),
+	ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,			1,	90),
+	ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag,	1,	91),
+	ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,			1,	92),
+	ICE_CTX_STORE(ice_tlan_ctx, cpuid,			8,	93),
+	ICE_CTX_STORE(ice_tlan_ctx, wb_mode,			1,	101),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc,			1,	102),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd,			1,	103),
+	ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc,			1,	104),
+	ICE_CTX_STORE(ice_tlan_ctx, cmpq_id,			9,	105),
+	ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func,		14,	114),
+	ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode,	1,	128),
+	ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id,		6,	129),
+	ICE_CTX_STORE(ice_tlan_ctx, qlen,			13,	135),
+	ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx,		4,	148),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_ena,			1,	152),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_qnum,			11,	153),
+	ICE_CTX_STORE(ice_tlan_ctx, legacy_int,			1,	164),
+	ICE_CTX_STORE(ice_tlan_ctx, drop_ena,			1,	165),
+	ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,		2,	166),
+	ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,	3,	168),
+	ICE_CTX_STORE(ice_tlan_ctx, tail,			13,	184),
+	{ 0 }
+};
+
+/**
+ * ice_copy_txq_ctx_from_hw - Copy txq context register from HW
+ * @hw: pointer to the hardware structure
+ * @ice_txq_ctx: pointer to the txq context
+ *
+ * Copy txq context from HW register space to dense structure
+ */
+static int
+ice_copy_txq_ctx_from_hw(struct ice_hw *hw, u8 *ice_txq_ctx)
+{
+	u8 i;
+
+	if (!ice_txq_ctx)
+		return -EINVAL;
+
+	/* Copy each dword separately from HW */
+	for (i = 0; i < ICE_TXQ_CTX_SIZE_DWORDS; i++) {
+		u32 *ctx = (u32 *)(ice_txq_ctx + (i * sizeof(u32)));
+
+		*ctx = rd32(hw, GLCOMM_QTX_CNTX_DATA(i));
+
+		ice_debug(hw, ICE_DBG_QCTX, "qtxdata[%d]: %08X\n", i, *ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_copy_txq_ctx_to_hw - Copy txq context register into HW
+ * @hw: pointer to the hardware structure
+ * @ice_txq_ctx: pointer to the txq context
+ *
+ * Copy txq context from dense structure to HW register space
+ */
+static int
+ice_copy_txq_ctx_to_hw(struct ice_hw *hw, u8 *ice_txq_ctx)
+{
+	u8 i;
+
+	if (!ice_txq_ctx)
+		return -EINVAL;
+
+	/* Copy each dword separately to HW */
+	for (i = 0; i < ICE_TXQ_CTX_SIZE_DWORDS; i++) {
+		u32 *ctx = (u32 *)(ice_txq_ctx + (i * sizeof(u32)));
+
+		wr32(hw, GLCOMM_QTX_CNTX_DATA(i), *ctx);
+
+		ice_debug(hw, ICE_DBG_QCTX, "qtxdata[%d]: %08X\n", i, *ctx);
+	}
+
+	return 0;
+}
+
+/* Configuration access to tx ring context(from PF) is done via indirect
+ * interface, GLCOMM_QTX_CNTX_CTL/DATA registers. However, there registers
+ * are shared by all the PFs with single PCI card. Hence multiplied PF may
+ * access there registers simultaneously, causing access conflicts. Then
+ * card-level grained locking is required to protect these registers from
+ * being competed by PF devices within the same card. However, there is no
+ * such kind of card-level locking supported. Introduce a coarse grained
+ * global lock which is shared by all the PF driver.
+ *
+ * The overall flow is to acquire the lock, read/write TXQ context through
+ * GLCOMM_QTX_CNTX_CTL/DATA indirect interface and release the lock once
+ * access is completed. In this way, only one PF can have access to TXQ
+ * context safely.
+ */
+static DEFINE_MUTEX(ice_global_txq_ctx_lock);
+
+/**
+ * ice_read_txq_ctx - Read txq context from HW
+ * @hw: pointer to the hardware structure
+ * @tlan_ctx: pointer to the txq context
+ * @txq_index: the index of the Tx queue
+ *
+ * Read txq context from HW register space and then convert it from dense
+ * structure to sparse
+ */
+int
+ice_read_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
+		 u32 txq_index)
+{
+	u8 ctx_buf[ICE_TXQ_CTX_SZ] = { 0 };
+	int status;
+	u32 txq_base;
+	u32 cmd, reg;
+
+	if (!tlan_ctx)
+		return -EINVAL;
+
+	if (txq_index > QTX_COMM_HEAD_MAX_INDEX)
+		return -EINVAL;
+
+	/* Get TXQ base within card space */
+	txq_base = rd32(hw, PFLAN_TX_QALLOC(hw->pf_id));
+	txq_base = (txq_base & PFLAN_TX_QALLOC_FIRSTQ_M) >>
+		   PFLAN_TX_QALLOC_FIRSTQ_S;
+
+	cmd = (GLCOMM_QTX_CNTX_CTL_CMD_READ
+		<< GLCOMM_QTX_CNTX_CTL_CMD_S) & GLCOMM_QTX_CNTX_CTL_CMD_M;
+	reg = cmd | GLCOMM_QTX_CNTX_CTL_CMD_EXEC_M |
+	      (((txq_base + txq_index) << GLCOMM_QTX_CNTX_CTL_QUEUE_ID_S) &
+	       GLCOMM_QTX_CNTX_CTL_QUEUE_ID_M);
+
+	mutex_lock(&ice_global_txq_ctx_lock);
+
+	wr32(hw, GLCOMM_QTX_CNTX_CTL, reg);
+	ice_flush(hw);
+
+	status = ice_copy_txq_ctx_from_hw(hw, ctx_buf);
+	if (status) {
+		mutex_unlock(&ice_global_txq_ctx_lock);
+		return status;
+	}
+
+	mutex_unlock(&ice_global_txq_ctx_lock);
+
+	return ice_get_ctx(ctx_buf, (u8 *)tlan_ctx, ice_tlan_ctx_data_info);
+}
+
+/**
+ * ice_write_txq_ctx - Write txq context from HW
+ * @hw: pointer to the hardware structure
+ * @tlan_ctx: pointer to the txq context
+ * @txq_index: the index of the Tx queue
+ *
+ * Convert txq context from sparse to dense structure and then write
+ * it to HW register space
+ */
+int
+ice_write_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
+		  u32 txq_index)
+{
+	u8 ctx_buf[ICE_TXQ_CTX_SZ] = { 0 };
+	int status;
+	u32 txq_base;
+	u32 cmd, reg;
+
+	if (!tlan_ctx)
+		return -EINVAL;
+
+	if (txq_index > QTX_COMM_HEAD_MAX_INDEX)
+		return -EINVAL;
+
+	ice_set_ctx(hw, (u8 *)tlan_ctx, ctx_buf, ice_tlan_ctx_info);
+
+	/* Get TXQ base within card space */
+	txq_base = rd32(hw, PFLAN_TX_QALLOC(hw->pf_id));
+	txq_base = (txq_base & PFLAN_TX_QALLOC_FIRSTQ_M) >>
+		   PFLAN_TX_QALLOC_FIRSTQ_S;
+
+	cmd = (GLCOMM_QTX_CNTX_CTL_CMD_WRITE_NO_DYN
+		<< GLCOMM_QTX_CNTX_CTL_CMD_S) & GLCOMM_QTX_CNTX_CTL_CMD_M;
+	reg = cmd | GLCOMM_QTX_CNTX_CTL_CMD_EXEC_M |
+	      (((txq_base + txq_index) << GLCOMM_QTX_CNTX_CTL_QUEUE_ID_S) &
+	       GLCOMM_QTX_CNTX_CTL_QUEUE_ID_M);
+
+	mutex_lock(&ice_global_txq_ctx_lock);
+
+	status = ice_copy_txq_ctx_to_hw(hw, ctx_buf);
+	if (status) {
+		mutex_lock(&ice_global_txq_ctx_lock);
+		return status;
+	}
+
+	wr32(hw, GLCOMM_QTX_CNTX_CTL, reg);
+	ice_flush(hw);
+
+	mutex_unlock(&ice_global_txq_ctx_lock);
+
+	return 0;
+}
 /* Sideband Queue command wrappers */
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index df9c7f30592a..40fbb9088475 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -58,6 +58,12 @@  ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 int
 ice_read_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 		 u32 rxq_index);
+int
+ice_read_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
+		 u32 txq_index);
+int
+ice_write_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx,
+		  u32 txq_index);
 
 int
 ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params);
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 86936b758ade..7410da715ad4 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -8,6 +8,7 @@ 
 
 #define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD_MAX_INDEX			16383
 #define QTX_COMM_HEAD_HEAD_S			0
 #define QTX_COMM_HEAD_HEAD_M			ICE_M(0x1FFF, 0)
 #define PF_FW_ARQBAH				0x00080180
@@ -258,6 +259,9 @@ 
 #define VPINT_ALLOC_PCI_VALID_M			BIT(31)
 #define VPINT_MBX_CTL(_VSI)			(0x0016A000 + ((_VSI) * 4))
 #define VPINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define PFLAN_TX_QALLOC(_PF)			(0x001D2580 + ((_PF) * 4))
+#define PFLAN_TX_QALLOC_FIRSTQ_S		0
+#define PFLAN_TX_QALLOC_FIRSTQ_M		ICE_M(0x3FFF, 0)
 #define GLLAN_RCTL_0				0x002941F8
 #define QRX_CONTEXT(_i, _QRX)			(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
 #define QRX_CTRL(_QRX)				(0x00120000 + ((_QRX) * 4))
@@ -362,6 +366,17 @@ 
 #define GLNVM_ULD_POR_DONE_1_M			BIT(8)
 #define GLNVM_ULD_PCIER_DONE_2_M		BIT(9)
 #define GLNVM_ULD_PE_DONE_M			BIT(10)
+#define GLCOMM_QTX_CNTX_CTL			0x002D2DC8
+#define GLCOMM_QTX_CNTX_CTL_QUEUE_ID_S		0
+#define GLCOMM_QTX_CNTX_CTL_QUEUE_ID_M		ICE_M(0x3FFF, 0)
+#define GLCOMM_QTX_CNTX_CTL_CMD_S		16
+#define GLCOMM_QTX_CNTX_CTL_CMD_M		ICE_M(0x7, 16)
+#define GLCOMM_QTX_CNTX_CTL_CMD_READ		0
+#define GLCOMM_QTX_CNTX_CTL_CMD_WRITE		1
+#define GLCOMM_QTX_CNTX_CTL_CMD_RESET		3
+#define GLCOMM_QTX_CNTX_CTL_CMD_WRITE_NO_DYN	4
+#define GLCOMM_QTX_CNTX_CTL_CMD_EXEC_M		BIT(19)
+#define GLCOMM_QTX_CNTX_DATA(_i)		(0x002D2D40 + ((_i) * 4))
 #define GLPCI_CNF2				0x000BE004
 #define GLPCI_CNF2_CACHELINE_SIZE_M		BIT(1)
 #define PF_FUNC_RID				0x0009E880
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 89f986a75cc8..79e07c863ae0 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -431,6 +431,8 @@  enum ice_rx_flex_desc_status_error_1_bits {
 
 #define ICE_RXQ_CTX_SIZE_DWORDS		8
 #define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+#define ICE_TXQ_CTX_SIZE_DWORDS		10
+#define ICE_TXQ_CTX_SZ			(ICE_TXQ_CTX_SIZE_DWORDS * sizeof(u32))
 #define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS	22
 #define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS	5
 #define GLTCLAN_CQ_CNTX(i, CQ)		(GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
@@ -649,6 +651,7 @@  struct ice_tlan_ctx {
 	u8 cache_prof_idx;
 	u8 pkt_shaper_prof_idx;
 	u8 int_q_state;	/* width not needed - internal - DO NOT WRITE!!! */
+	u16 tail;
 };
 
 /* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the