Message ID | a5c1b2110e567f499e17a4a67f1cc7c2036566c4.1742324341.git.pav@iki.fi (mailing list archive) |
---|---|
State | Accepted |
Commit | 2a1b83b8a4b26df4a105b02fc713283ea5ea9a59 |
Headers | show |
Series | net: Bluetooth: add TX timestamping for ISO/L2CAP/SCO | expand |
Context | Check | Description |
---|---|---|
tedd_an/SubjectPrefix | success | Gitlint PASS |
On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > Support enabling TX timestamping for some skbs, and track them until > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > completion report from hardware. > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > driver requires changes in the driver API, and drivers mostly are going > to send the skb immediately. > > Make the default situation with no COMPLETION TX timestamping more > efficient by only counting packets in the queue when there is nothing to > track. When there is something to track, we need to make clones, since > the driver may modify sent skbs. > > The tx_q queue length is bounded by the hdev flow control, which will > not send new packets before it has got completion reports for old ones. > > Signed-off-by: Pauli Virtanen <pav@iki.fi> > --- > > Notes: > v5: > - Add hci_sockm_init() > - Back to decoupled COMPLETION & SND, like in v3 > - Handle SCO flow controlled case > > include/net/bluetooth/hci_core.h | 20 +++++ > net/bluetooth/hci_conn.c | 122 +++++++++++++++++++++++++++++++ > net/bluetooth/hci_core.c | 15 +++- > net/bluetooth/hci_event.c | 4 + > 4 files changed, 157 insertions(+), 4 deletions(-) > > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h > index f78e4298e39a..5115da34f881 100644 > --- a/include/net/bluetooth/hci_core.h > +++ b/include/net/bluetooth/hci_core.h > @@ -261,6 +261,12 @@ struct adv_info { > struct delayed_work rpa_expired_cb; > }; > > +struct tx_queue { > + struct sk_buff_head queue; > + unsigned int extra; > + unsigned int tracked; > +}; > + > #define HCI_MAX_ADV_INSTANCES 5 > #define HCI_DEFAULT_ADV_DURATION 2 > > @@ -733,6 +739,8 @@ struct hci_conn { > struct sk_buff_head data_q; > struct list_head chan_list; > > + struct tx_queue tx_q; > + > struct delayed_work disc_work; > struct delayed_work auto_accept_work; > struct delayed_work idle_work; > @@ -1572,6 +1580,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); > void hci_conn_failed(struct hci_conn *conn, u8 status); > u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle); > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb); > +void hci_conn_tx_dequeue(struct hci_conn *conn); > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > + const struct sockcm_cookie *sockc); > + > +static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk) > +{ > + *sockc = (struct sockcm_cookie) { > + .tsflags = READ_ONCE(sk->sk_tsflags), > + }; > +} > + > /* > * hci_conn_get() and hci_conn_put() are used to control the life-time of an > * "hci_conn" object. They do not guarantee that the hci_conn object is running, > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c > index d097e308a755..95972fd4c784 100644 > --- a/net/bluetooth/hci_conn.c > +++ b/net/bluetooth/hci_conn.c > @@ -27,6 +27,7 @@ > > #include <linux/export.h> > #include <linux/debugfs.h> > +#include <linux/errqueue.h> > > #include <net/bluetooth/bluetooth.h> > #include <net/bluetooth/hci_core.h> > @@ -1002,6 +1003,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t > } > > skb_queue_head_init(&conn->data_q); > + skb_queue_head_init(&conn->tx_q.queue); > > INIT_LIST_HEAD(&conn->chan_list); > INIT_LIST_HEAD(&conn->link_list); > @@ -1155,6 +1157,7 @@ void hci_conn_del(struct hci_conn *conn) > } > > skb_queue_purge(&conn->data_q); > + skb_queue_purge(&conn->tx_q.queue); > > /* Remove the connection from the list and cleanup its remaining > * state. This is a separate function since for some cases like > @@ -3064,3 +3067,122 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) > */ > return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); > } > + > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > + const struct sockcm_cookie *sockc) > +{ > + struct sock *sk = skb ? skb->sk : NULL; > + > + /* This shall be called on a single skb of those generated by user > + * sendmsg(), and only when the sendmsg() does not return error to > + * user. This is required for keeping the tskey that increments here in > + * sync with possible sendmsg() counting by user. > + * > + * Stream sockets shall set key_offset to sendmsg() length in bytes > + * and call with the last fragment, others to 1 and first fragment. > + */ > + > + if (!skb || !sockc || !sk || !key_offset) > + return; > + > + sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); > + > + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && > + sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { > + if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { > + skb_shinfo(skb)->tskey = sockc->ts_opt_id; > + } else { > + int key = atomic_add_return(key_offset, &sk->sk_tskey); > + > + skb_shinfo(skb)->tskey = key - 1; > + } > + } > +} > + > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > +{ > + struct tx_queue *comp = &conn->tx_q; > + bool track = false; > + > + /* Emit SND now, ie. just before sending to driver */ > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); It's a bit strange that SCM_TSTAMP_SND is under the control of SKBTX_SW_TSTAMP. Can we use the same flag for both lines here directly? I suppose I would use SKBTX_SW_TSTAMP then. > + > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > + * Completed Packets event. Available only for flow controlled cases. > + * > + * TODO: SCO support without flowctl (needs to be done in drivers) > + */ > + switch (conn->type) { > + case ISO_LINK: > + case ACL_LINK: > + case LE_LINK: > + break; > + case SCO_LINK: > + case ESCO_LINK: > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > + return; > + break; > + default: > + return; > + } > + > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > + track = true; > + > + /* If nothing is tracked, just count extra skbs at the queue head */ > + if (!track && !comp->tracked) { > + comp->extra++; > + return; > + } > + > + if (track) { > + skb = skb_clone_sk(skb); > + if (!skb) > + goto count_only; > + > + comp->tracked++; > + } else { > + skb = skb_clone(skb, GFP_KERNEL); > + if (!skb) > + goto count_only; > + } > + > + skb_queue_tail(&comp->queue, skb); > + return; > + > +count_only: > + /* Stop tracking skbs, and only count. This will not emit timestamps for > + * the packets, but if we get here something is more seriously wrong. > + */ > + comp->tracked = 0; > + comp->extra += skb_queue_len(&comp->queue) + 1; > + skb_queue_purge(&comp->queue); > +} > + > +void hci_conn_tx_dequeue(struct hci_conn *conn) > +{ > + struct tx_queue *comp = &conn->tx_q; > + struct sk_buff *skb; > + > + /* If there are tracked skbs, the counted extra go before dequeuing real > + * skbs, to keep ordering. When nothing is tracked, the ordering doesn't > + * matter so dequeue real skbs first to get rid of them ASAP. > + */ > + if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) { > + comp->extra--; > + return; > + } > + > + skb = skb_dequeue(&comp->queue); > + if (!skb) > + return; > + > + if (skb->sk) { > + comp->tracked--; Need an explicit if statement here? > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, > + SCM_TSTAMP_COMPLETION); This is the socket timestamping, and that's right. My minor question is: for the use of bpf timestamping (that should be easy as you've done in patch 1, I presume), I'm not sure if you're familiar with it. If not, I plan to implement it myself in a follow-up patch and then let you do some tests? Of course, I will provide the bpf test script :) Thanks, Jason > + } > + > + kfree_skb(skb); > +} > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > index 94d9147612da..5eb0600bbd03 100644 > --- a/net/bluetooth/hci_core.c > +++ b/net/bluetooth/hci_core.c > @@ -3029,6 +3029,13 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) > return 0; > } > > +static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, > + struct sk_buff *skb) > +{ > + hci_conn_tx_queue(conn, skb); > + return hci_send_frame(hdev, skb); > +} > + > /* Send HCI command */ > int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, > const void *param) > @@ -3575,7 +3582,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) > while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > BT_DBG("skb %p len %d", skb, skb->len); > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, conn, skb); > > conn->sent++; > if (conn->sent == ~0) > @@ -3618,7 +3625,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) > hci_conn_enter_active_mode(chan->conn, > bt_cb(skb)->force_active); > > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, chan->conn, skb); > hdev->acl_last_tx = jiffies; > > hdev->acl_cnt--; > @@ -3674,7 +3681,7 @@ static void hci_sched_le(struct hci_dev *hdev) > > skb = skb_dequeue(&chan->data_q); > > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, chan->conn, skb); > hdev->le_last_tx = jiffies; > > (*cnt)--; > @@ -3708,7 +3715,7 @@ static void hci_sched_iso(struct hci_dev *hdev) > while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > BT_DBG("skb %p len %d", skb, skb->len); > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, conn, skb); > > conn->sent++; > if (conn->sent == ~0) > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c > index 0df4a0e082c8..83990c975c1f 100644 > --- a/net/bluetooth/hci_event.c > +++ b/net/bluetooth/hci_event.c > @@ -4415,6 +4415,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > struct hci_comp_pkts_info *info = &ev->handles[i]; > struct hci_conn *conn; > __u16 handle, count; > + unsigned int i; > > handle = __le16_to_cpu(info->handle); > count = __le16_to_cpu(info->count); > @@ -4425,6 +4426,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > > conn->sent -= count; > > + for (i = 0; i < count; ++i) > + hci_conn_tx_dequeue(conn); > + > switch (conn->type) { > case ACL_LINK: > hdev->acl_cnt += count; > -- > 2.48.1 > >
Jason Xing wrote: > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > Support enabling TX timestamping for some skbs, and track them until > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > completion report from hardware. > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > driver requires changes in the driver API, and drivers mostly are going > > to send the skb immediately. > > > > Make the default situation with no COMPLETION TX timestamping more > > efficient by only counting packets in the queue when there is nothing to > > track. When there is something to track, we need to make clones, since > > the driver may modify sent skbs. Why count packets at all? And if useful separate from completions, should that be a separate patch? > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > > +{ > > + struct tx_queue *comp = &conn->tx_q; > > + bool track = false; > > + > > + /* Emit SND now, ie. just before sending to driver */ > > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); > > It's a bit strange that SCM_TSTAMP_SND is under the control of > SKBTX_SW_TSTAMP. Can we use the same flag for both lines here > directly? I suppose I would use SKBTX_SW_TSTAMP then. This is the established behavior. > > > + > > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > > + * Completed Packets event. Available only for flow controlled cases. > > + * > > + * TODO: SCO support without flowctl (needs to be done in drivers) > > + */ > > + switch (conn->type) { > > + case ISO_LINK: > > + case ACL_LINK: > > + case LE_LINK: > > + break; > > + case SCO_LINK: > > + case ESCO_LINK: > > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > > + return; > > + break; > > + default: > > + return; > > + } > > + > > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > > + track = true; > > + > > + /* If nothing is tracked, just count extra skbs at the queue head */ > > + if (!track && !comp->tracked) { > > + comp->extra++; > > + return; > > + } > > + > > + if (track) { > > + skb = skb_clone_sk(skb); > > + if (!skb) > > + goto count_only; > > + > > + comp->tracked++; > > + } else { > > + skb = skb_clone(skb, GFP_KERNEL); > > + if (!skb) > > + goto count_only; > > + } What is the difference between track and comp->tracked
Hi, ke, 2025-03-19 kello 10:44 -0400, Willem de Bruijn kirjoitti: > Jason Xing wrote: > > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > > > Support enabling TX timestamping for some skbs, and track them until > > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > > completion report from hardware. > > > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > > driver requires changes in the driver API, and drivers mostly are going > > > to send the skb immediately. > > > > > > Make the default situation with no COMPLETION TX timestamping more > > > efficient by only counting packets in the queue when there is nothing to > > > track. When there is something to track, we need to make clones, since > > > the driver may modify sent skbs. > > Why count packets at all? And if useful separate from completions, > should that be a separate patch? This paragraph was commenting on the implementation of struct tx_queue, and maybe how it works should be explicitly explained somewhere (code comment?). Here's some explanation of it: 1) We have to hang on (clones of) skbs until completion reports for them arrive, in order to emit COMPLETION timestamps. There's no existing queue that does this in net/bluetooth (drivers may just copy data & discard skbs, and they don't know about completion reports), so something new needs to be added. 2) It is only needed for emitting COMPLETION timestamps. So it's better to not do any extra work (clones etc.) when there are no such timestamps to be emitted. 3) The new queue should work correctly when timestamping is turned on or off, or only some packets are timestamped. It should also eventually return to a state where no extra work is done, when new skbs don't request COMPLETION timestamps. struct tx_queue implements such queue that only "tracks" some skbs. Logical structure: HEAD <no stored skb> } <no stored skb> } tx_queue::extra is the number of non-tracked ... } logical items at queue head <no stored skb> } <tracked skb> } tx_queue::queue contains mixture of <non-tracked skb> } tracked items (skb->sk != NULL) and <non-tracked skb> } non-tracked items (skb->sk == NULL). <tracked skb> } These are ordered after the "extra" items. TAIL tx_queue::tracked is the number of tracked skbs in tx_queue::queue. hci_conn_tx_queue() determines whether skb is tracked (= COMPLETION timestamp shall be emitted for it) and pushes a logical item to TAIL. hci_conn_tx_dequeue() pops a logical item from HEAD, and emits timestamp if it corresponds to a tracked skb. When tracked == 0, queue() can just increment tx_queue::extra, and dequeue() can remove any skb from tx_queue::queue, or if empty then decrement tx_queue::extra. This allows it to return to a state with empty tx_queue::queue when new skbs no longer request timestamps. When tracked != 0, the ordering of items in the queue needs to be respected strictly, so queue() always pushes real skb (tracked or not) to TAIL, and dequeue() has to decrement extra to zero, before it can pop skb from queue head. > > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > > > +{ > > > + struct tx_queue *comp = &conn->tx_q; > > > + bool track = false; > > > + > > > + /* Emit SND now, ie. just before sending to driver */ > > > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); > > > > It's a bit strange that SCM_TSTAMP_SND is under the control of > > SKBTX_SW_TSTAMP. Can we use the same flag for both lines here > > directly? I suppose I would use SKBTX_SW_TSTAMP then. > > This is the established behavior. > > > > > + > > > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > > > + * Completed Packets event. Available only for flow controlled cases. > > > + * > > > + * TODO: SCO support without flowctl (needs to be done in drivers) > > > + */ > > > + switch (conn->type) { > > > + case ISO_LINK: > > > + case ACL_LINK: > > > + case LE_LINK: > > > + break; > > > + case SCO_LINK: > > > + case ESCO_LINK: > > > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > > > + return; > > > + break; > > > + default: > > > + return; > > > + } > > > + > > > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > > > + track = true; > > > + > > > + /* If nothing is tracked, just count extra skbs at the queue head */ > > > + if (!track && !comp->tracked) { > > > + comp->extra++; > > > + return; > > > + } > > > + > > > + if (track) { > > > + skb = skb_clone_sk(skb); > > > + if (!skb) > > > + goto count_only; > > > + > > > + comp->tracked++; > > > + } else { > > > + skb = skb_clone(skb, GFP_KERNEL); > > > + if (!skb) > > > + goto count_only; > > > + } > > What is the difference between track and comp->tracked I hope the answer above is clear.
Hi, ke, 2025-03-19 kello 08:39 +0800, Jason Xing kirjoitti: > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > Support enabling TX timestamping for some skbs, and track them until > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > completion report from hardware. > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > driver requires changes in the driver API, and drivers mostly are going > > to send the skb immediately. > > > > Make the default situation with no COMPLETION TX timestamping more > > efficient by only counting packets in the queue when there is nothing to > > track. When there is something to track, we need to make clones, since > > the driver may modify sent skbs. > > > > The tx_q queue length is bounded by the hdev flow control, which will > > not send new packets before it has got completion reports for old ones. > > > > Signed-off-by: Pauli Virtanen <pav@iki.fi> > > --- > > > > Notes: > > v5: > > - Add hci_sockm_init() > > - Back to decoupled COMPLETION & SND, like in v3 > > - Handle SCO flow controlled case > > > > include/net/bluetooth/hci_core.h | 20 +++++ > > net/bluetooth/hci_conn.c | 122 +++++++++++++++++++++++++++++++ > > net/bluetooth/hci_core.c | 15 +++- > > net/bluetooth/hci_event.c | 4 + > > 4 files changed, 157 insertions(+), 4 deletions(-) > > > > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h > > index f78e4298e39a..5115da34f881 100644 > > --- a/include/net/bluetooth/hci_core.h > > +++ b/include/net/bluetooth/hci_core.h > > @@ -261,6 +261,12 @@ struct adv_info { > > struct delayed_work rpa_expired_cb; > > }; > > > > +struct tx_queue { > > + struct sk_buff_head queue; > > + unsigned int extra; > > + unsigned int tracked; > > +}; > > + > > #define HCI_MAX_ADV_INSTANCES 5 > > #define HCI_DEFAULT_ADV_DURATION 2 > > > > @@ -733,6 +739,8 @@ struct hci_conn { > > struct sk_buff_head data_q; > > struct list_head chan_list; > > > > + struct tx_queue tx_q; > > + > > struct delayed_work disc_work; > > struct delayed_work auto_accept_work; > > struct delayed_work idle_work; > > @@ -1572,6 +1580,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); > > void hci_conn_failed(struct hci_conn *conn, u8 status); > > u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle); > > > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb); > > +void hci_conn_tx_dequeue(struct hci_conn *conn); > > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > > + const struct sockcm_cookie *sockc); > > + > > +static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk) > > +{ > > + *sockc = (struct sockcm_cookie) { > > + .tsflags = READ_ONCE(sk->sk_tsflags), > > + }; > > +} > > + > > /* > > * hci_conn_get() and hci_conn_put() are used to control the life-time of an > > * "hci_conn" object. They do not guarantee that the hci_conn object is running, > > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c > > index d097e308a755..95972fd4c784 100644 > > --- a/net/bluetooth/hci_conn.c > > +++ b/net/bluetooth/hci_conn.c > > @@ -27,6 +27,7 @@ > > > > #include <linux/export.h> > > #include <linux/debugfs.h> > > +#include <linux/errqueue.h> > > > > #include <net/bluetooth/bluetooth.h> > > #include <net/bluetooth/hci_core.h> > > @@ -1002,6 +1003,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t > > } > > > > skb_queue_head_init(&conn->data_q); > > + skb_queue_head_init(&conn->tx_q.queue); > > > > INIT_LIST_HEAD(&conn->chan_list); > > INIT_LIST_HEAD(&conn->link_list); > > @@ -1155,6 +1157,7 @@ void hci_conn_del(struct hci_conn *conn) > > } > > > > skb_queue_purge(&conn->data_q); > > + skb_queue_purge(&conn->tx_q.queue); > > > > /* Remove the connection from the list and cleanup its remaining > > * state. This is a separate function since for some cases like > > @@ -3064,3 +3067,122 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) > > */ > > return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); > > } > > + > > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > > + const struct sockcm_cookie *sockc) > > +{ > > + struct sock *sk = skb ? skb->sk : NULL; > > + > > + /* This shall be called on a single skb of those generated by user > > + * sendmsg(), and only when the sendmsg() does not return error to > > + * user. This is required for keeping the tskey that increments here in > > + * sync with possible sendmsg() counting by user. > > + * > > + * Stream sockets shall set key_offset to sendmsg() length in bytes > > + * and call with the last fragment, others to 1 and first fragment. > > + */ > > + > > + if (!skb || !sockc || !sk || !key_offset) > > + return; > > + > > + sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); > > + > > + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && > > + sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { > > + if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { > > + skb_shinfo(skb)->tskey = sockc->ts_opt_id; > > + } else { > > + int key = atomic_add_return(key_offset, &sk->sk_tskey); > > + > > + skb_shinfo(skb)->tskey = key - 1; > > + } > > + } > > +} > > + > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > > +{ > > + struct tx_queue *comp = &conn->tx_q; > > + bool track = false; > > + > > + /* Emit SND now, ie. just before sending to driver */ > > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); > > It's a bit strange that SCM_TSTAMP_SND is under the control of > SKBTX_SW_TSTAMP. Can we use the same flag for both lines here > directly? I suppose I would use SKBTX_SW_TSTAMP then. This is more or less open-coded skb_tx_timestamp(), which drivers do before sending to HW, for the Bluetooth case. AFAIK it should be done like this. > > > + > > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > > + * Completed Packets event. Available only for flow controlled cases. > > + * > > + * TODO: SCO support without flowctl (needs to be done in drivers) > > + */ > > + switch (conn->type) { > > + case ISO_LINK: > > + case ACL_LINK: > > + case LE_LINK: > > + break; > > + case SCO_LINK: > > + case ESCO_LINK: > > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > > + return; > > + break; > > + default: > > + return; > > + } > > + > > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > > + track = true; > > + > > + /* If nothing is tracked, just count extra skbs at the queue head */ > > + if (!track && !comp->tracked) { > > + comp->extra++; > > + return; > > + } > > + > > + if (track) { > > + skb = skb_clone_sk(skb); > > + if (!skb) > > + goto count_only; > > + > > + comp->tracked++; > > + } else { > > + skb = skb_clone(skb, GFP_KERNEL); > > + if (!skb) > > + goto count_only; > > + } > > + > > + skb_queue_tail(&comp->queue, skb); > > + return; > > + > > +count_only: > > + /* Stop tracking skbs, and only count. This will not emit timestamps for > > + * the packets, but if we get here something is more seriously wrong. > > + */ > > + comp->tracked = 0; > > + comp->extra += skb_queue_len(&comp->queue) + 1; > > + skb_queue_purge(&comp->queue); > > +} > > + > > +void hci_conn_tx_dequeue(struct hci_conn *conn) > > +{ > > + struct tx_queue *comp = &conn->tx_q; > > + struct sk_buff *skb; > > + > > + /* If there are tracked skbs, the counted extra go before dequeuing real > > + * skbs, to keep ordering. When nothing is tracked, the ordering doesn't > > + * matter so dequeue real skbs first to get rid of them ASAP. > > + */ > > + if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) { > > + comp->extra--; > > + return; > > + } > > + > > + skb = skb_dequeue(&comp->queue); > > + if (!skb) > > + return; > > + > > + if (skb->sk) { > > + comp->tracked--; > > Need an explicit if statement here? I think no, see explanation of how it works in the reply to Willem: https://lore.kernel.org/linux-bluetooth/5882af942ef8cf5c9b4ce36a348f959807a387b0.camel@iki.fi/ > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, > > + SCM_TSTAMP_COMPLETION); > > This is the socket timestamping, and that's right. My minor question > is: for the use of bpf timestamping (that should be easy as you've > done in patch 1, I presume), I'm not sure if you're familiar with it. > If not, I plan to implement it myself in a follow-up patch and then > let you do some tests? Of course, I will provide the bpf test script I saw the BPF timestamping things, but didn't look in full detail yet. I don't know much about BPF, but IIUC, is it just as simple as adding BPF_SOCK_OPS_TSTAMP_COMPLETION_CB and then modifying skb_tstamp_tx_report_bpf_timestamping() accordingly? I think we'd want to add also the BPF tests in the Bluetooth socket timestamping tests https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/doc/test-runner.rst https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/tester.h#n91 https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/iso-tester.c#n2275 https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/sco-tester.c#n755 https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/l2cap-tester.c#n1369 If you could show an example how to setup the BPF tstamps and pass the resulting tstamp data in some way back to the application, that could be very helpful (and I could postpone learning BPF for a little while longer :) > :) > > Thanks, > Jason > > > + } > > + > > + kfree_skb(skb); > > +} > > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > > index 94d9147612da..5eb0600bbd03 100644 > > --- a/net/bluetooth/hci_core.c > > +++ b/net/bluetooth/hci_core.c > > @@ -3029,6 +3029,13 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) > > return 0; > > } > > > > +static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, > > + struct sk_buff *skb) > > +{ > > + hci_conn_tx_queue(conn, skb); > > + return hci_send_frame(hdev, skb); > > +} > > + > > /* Send HCI command */ > > int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, > > const void *param) > > @@ -3575,7 +3582,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) > > while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { > > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > > BT_DBG("skb %p len %d", skb, skb->len); > > - hci_send_frame(hdev, skb); > > + hci_send_conn_frame(hdev, conn, skb); > > > > conn->sent++; > > if (conn->sent == ~0) > > @@ -3618,7 +3625,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) > > hci_conn_enter_active_mode(chan->conn, > > bt_cb(skb)->force_active); > > > > - hci_send_frame(hdev, skb); > > + hci_send_conn_frame(hdev, chan->conn, skb); > > hdev->acl_last_tx = jiffies; > > > > hdev->acl_cnt--; > > @@ -3674,7 +3681,7 @@ static void hci_sched_le(struct hci_dev *hdev) > > > > skb = skb_dequeue(&chan->data_q); > > > > - hci_send_frame(hdev, skb); > > + hci_send_conn_frame(hdev, chan->conn, skb); > > hdev->le_last_tx = jiffies; > > > > (*cnt)--; > > @@ -3708,7 +3715,7 @@ static void hci_sched_iso(struct hci_dev *hdev) > > while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { > > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > > BT_DBG("skb %p len %d", skb, skb->len); > > - hci_send_frame(hdev, skb); > > + hci_send_conn_frame(hdev, conn, skb); > > > > conn->sent++; > > if (conn->sent == ~0) > > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c > > index 0df4a0e082c8..83990c975c1f 100644 > > --- a/net/bluetooth/hci_event.c > > +++ b/net/bluetooth/hci_event.c > > @@ -4415,6 +4415,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > > struct hci_comp_pkts_info *info = &ev->handles[i]; > > struct hci_conn *conn; > > __u16 handle, count; > > + unsigned int i; > > > > handle = __le16_to_cpu(info->handle); > > count = __le16_to_cpu(info->count); > > @@ -4425,6 +4426,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > > > > conn->sent -= count; > > > > + for (i = 0; i < count; ++i) > > + hci_conn_tx_dequeue(conn); > > + > > switch (conn->type) { > > case ACL_LINK: > > hdev->acl_cnt += count; > > -- > > 2.48.1 > > > >
Pauli Virtanen wrote: > Hi, > > ke, 2025-03-19 kello 10:44 -0400, Willem de Bruijn kirjoitti: > > Jason Xing wrote: > > > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > > > > > Support enabling TX timestamping for some skbs, and track them until > > > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > > > completion report from hardware. > > > > > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > > > driver requires changes in the driver API, and drivers mostly are going > > > > to send the skb immediately. > > > > > > > > Make the default situation with no COMPLETION TX timestamping more > > > > efficient by only counting packets in the queue when there is nothing to > > > > track. When there is something to track, we need to make clones, since > > > > the driver may modify sent skbs. > > > > Why count packets at all? And if useful separate from completions, > > should that be a separate patch? > > This paragraph was commenting on the implementation of struct tx_queue, > and maybe how it works should be explicitly explained somewhere (code > comment?). Here's some explanation of it: > > 1) We have to hang on (clones of) skbs until completion reports for > them arrive, in order to emit COMPLETION timestamps. There's no > existing queue that does this in net/bluetooth (drivers may just copy > data & discard skbs, and they don't know about completion reports), so > something new needs to be added. > > 2) It is only needed for emitting COMPLETION timestamps. So it's better > to not do any extra work (clones etc.) when there are no such > timestamps to be emitted. > > 3) The new queue should work correctly when timestamping is turned on > or off, or only some packets are timestamped. It should also eventually > return to a state where no extra work is done, when new skbs don't > request COMPLETION timestamps. So far, fully understood. > struct tx_queue implements such queue that only "tracks" some skbs. > Logical structure: > > HEAD > <no stored skb> } > <no stored skb> } tx_queue::extra is the number of non-tracked > ... } logical items at queue head > <no stored skb> } > <tracked skb> } tx_queue::queue contains mixture of > <non-tracked skb> } tracked items (skb->sk != NULL) and > <non-tracked skb> } non-tracked items (skb->sk == NULL). > <tracked skb> } These are ordered after the "extra" items. > TAIL > > tx_queue::tracked is the number of tracked skbs in tx_queue::queue. > > hci_conn_tx_queue() determines whether skb is tracked (= COMPLETION > timestamp shall be emitted for it) and pushes a logical item to TAIL. > > hci_conn_tx_dequeue() pops a logical item from HEAD, and emits > timestamp if it corresponds to a tracked skb. > > When tracked == 0, queue() can just increment tx_queue::extra, and > dequeue() can remove any skb from tx_queue::queue, or if empty then > decrement tx_queue::extra. This allows it to return to a state with > empty tx_queue::queue when new skbs no longer request timestamps. > > When tracked != 0, the ordering of items in the queue needs to be > respected strictly, so queue() always pushes real skb (tracked or not) > to TAIL, and dequeue() has to decrement extra to zero, before it can > pop skb from queue head. Thanks. I did not understand why you need to queue or track any sbs aside from those that have SKBTX_COMPLETION_TSTAMP. If I follow correctly this is to be able to associate the tx completion with the right skb on the queue. The usual model in Ethernet drivers is that every tx descriptor (and completion descriptor) in the ring is associated with a pure software ring of metadata structures, which can point to an skb (or NULL). In a pinch, instead the skb on the queue itself could record the descriptor id that it is associated with. But hci_conn_tx_queue is too far removed from the HW, so has no direct access to that. And similarly hci_conn_tx_dequeue has no such low level details. So long story short you indeed have to track this out of band with a separate counter. I also don't immediately see a simpler way. Though you can perhaps replace the skb_clone (not the skb_clone_sk!) with some sentinel value that just helps count?
Hi Pauli, On Wed, Mar 19, 2025 at 1:43 PM Pauli Virtanen <pav@iki.fi> wrote: > > Hi, > > ke, 2025-03-19 kello 10:44 -0400, Willem de Bruijn kirjoitti: > > Jason Xing wrote: > > > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > > > > > Support enabling TX timestamping for some skbs, and track them until > > > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > > > completion report from hardware. > > > > > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > > > driver requires changes in the driver API, and drivers mostly are going > > > > to send the skb immediately. > > > > > > > > Make the default situation with no COMPLETION TX timestamping more > > > > efficient by only counting packets in the queue when there is nothing to > > > > track. When there is something to track, we need to make clones, since > > > > the driver may modify sent skbs. > > > > Why count packets at all? And if useful separate from completions, > > should that be a separate patch? > > This paragraph was commenting on the implementation of struct tx_queue, > and maybe how it works should be explicitly explained somewhere (code > comment?). Here's some explanation of it: > > 1) We have to hang on (clones of) skbs until completion reports for > them arrive, in order to emit COMPLETION timestamps. There's no > existing queue that does this in net/bluetooth (drivers may just copy > data & discard skbs, and they don't know about completion reports), so > something new needs to be added. > > 2) It is only needed for emitting COMPLETION timestamps. So it's better > to not do any extra work (clones etc.) when there are no such > timestamps to be emitted. > > 3) The new queue should work correctly when timestamping is turned on > or off, or only some packets are timestamped. It should also eventually > return to a state where no extra work is done, when new skbs don't > request COMPLETION timestamps. I don't think it would hurt to put some of the above text as code comments, but I think it would actually be better if we start doing some documentation for Bluetooth in general, or we can put as part of the manpages in userspace though that normally cover only the interface not the internals, but I think it is a good idea to add documentation to the likes of l2cap.rst covering the usage of SO_TIMESTAMPING: https://github.com/bluez/bluez/blob/master/doc/l2cap.rst It is on my TODO to do something similar to SCO and ISO(once it is stable) sockets. > > struct tx_queue implements such queue that only "tracks" some skbs. > Logical structure: > > HEAD > <no stored skb> } > <no stored skb> } tx_queue::extra is the number of non-tracked > ... } logical items at queue head > <no stored skb> } > <tracked skb> } tx_queue::queue contains mixture of > <non-tracked skb> } tracked items (skb->sk != NULL) and > <non-tracked skb> } non-tracked items (skb->sk == NULL). > <tracked skb> } These are ordered after the "extra" items. > TAIL > > tx_queue::tracked is the number of tracked skbs in tx_queue::queue. > > hci_conn_tx_queue() determines whether skb is tracked (= COMPLETION > timestamp shall be emitted for it) and pushes a logical item to TAIL. > > hci_conn_tx_dequeue() pops a logical item from HEAD, and emits > timestamp if it corresponds to a tracked skb. > > When tracked == 0, queue() can just increment tx_queue::extra, and > dequeue() can remove any skb from tx_queue::queue, or if empty then > decrement tx_queue::extra. This allows it to return to a state with > empty tx_queue::queue when new skbs no longer request timestamps. > > When tracked != 0, the ordering of items in the queue needs to be > respected strictly, so queue() always pushes real skb (tracked or not) > to TAIL, and dequeue() has to decrement extra to zero, before it can > pop skb from queue head. > > > > > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > > > > +{ > > > > + struct tx_queue *comp = &conn->tx_q; > > > > + bool track = false; > > > > + > > > > + /* Emit SND now, ie. just before sending to driver */ > > > > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > > > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); > > > > > > It's a bit strange that SCM_TSTAMP_SND is under the control of > > > SKBTX_SW_TSTAMP. Can we use the same flag for both lines here > > > directly? I suppose I would use SKBTX_SW_TSTAMP then. > > > > This is the established behavior. > > > > > > > + > > > > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > > > > + * Completed Packets event. Available only for flow controlled cases. > > > > + * > > > > + * TODO: SCO support without flowctl (needs to be done in drivers) > > > > + */ > > > > + switch (conn->type) { > > > > + case ISO_LINK: > > > > + case ACL_LINK: > > > > + case LE_LINK: > > > > + break; > > > > + case SCO_LINK: > > > > + case ESCO_LINK: > > > > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > > > > + return; > > > > + break; > > > > + default: > > > > + return; > > > > + } > > > > + > > > > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > > > > + track = true; > > > > + > > > > + /* If nothing is tracked, just count extra skbs at the queue head */ > > > > + if (!track && !comp->tracked) { > > > > + comp->extra++; > > > > + return; > > > > + } > > > > + > > > > + if (track) { > > > > + skb = skb_clone_sk(skb); > > > > + if (!skb) > > > > + goto count_only; > > > > + > > > > + comp->tracked++; > > > > + } else { > > > > + skb = skb_clone(skb, GFP_KERNEL); > > > > + if (!skb) > > > > + goto count_only; > > > > + } > > > > What is the difference between track and comp->tracked > > I hope the answer above is clear. > > -- > Pauli Virtanen
Hi, ke, 2025-03-19 kello 16:00 -0400, Willem de Bruijn kirjoitti: > Pauli Virtanen wrote: > > ke, 2025-03-19 kello 10:44 -0400, Willem de Bruijn kirjoitti: > > > Jason Xing wrote: > > > > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > > > > > > > Support enabling TX timestamping for some skbs, and track them until > > > > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > > > > completion report from hardware. > > > > > > > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > > > > driver requires changes in the driver API, and drivers mostly are going > > > > > to send the skb immediately. > > > > > > > > > > Make the default situation with no COMPLETION TX timestamping more > > > > > efficient by only counting packets in the queue when there is nothing to > > > > > track. When there is something to track, we need to make clones, since > > > > > the driver may modify sent skbs. > > > > > > Why count packets at all? And if useful separate from completions, > > > should that be a separate patch? > > > > This paragraph was commenting on the implementation of struct tx_queue, > > and maybe how it works should be explicitly explained somewhere (code > > comment?). Here's some explanation of it: > > > > 1) We have to hang on (clones of) skbs until completion reports for > > them arrive, in order to emit COMPLETION timestamps. There's no > > existing queue that does this in net/bluetooth (drivers may just copy > > data & discard skbs, and they don't know about completion reports), so > > something new needs to be added. > > > > 2) It is only needed for emitting COMPLETION timestamps. So it's better > > to not do any extra work (clones etc.) when there are no such > > timestamps to be emitted. > > > > 3) The new queue should work correctly when timestamping is turned on > > or off, or only some packets are timestamped. It should also eventually > > return to a state where no extra work is done, when new skbs don't > > request COMPLETION timestamps. > > So far, fully understood. > > > struct tx_queue implements such queue that only "tracks" some skbs. > > Logical structure: > > > > HEAD > > <no stored skb> } > > <no stored skb> } tx_queue::extra is the number of non-tracked > > ... } logical items at queue head > > <no stored skb> } > > <tracked skb> } tx_queue::queue contains mixture of > > <non-tracked skb> } tracked items (skb->sk != NULL) and > > <non-tracked skb> } non-tracked items (skb->sk == NULL). > > <tracked skb> } These are ordered after the "extra" items. > > TAIL > > > > tx_queue::tracked is the number of tracked skbs in tx_queue::queue. > > > > hci_conn_tx_queue() determines whether skb is tracked (= COMPLETION > > timestamp shall be emitted for it) and pushes a logical item to TAIL. > > > > hci_conn_tx_dequeue() pops a logical item from HEAD, and emits > > timestamp if it corresponds to a tracked skb. > > > > When tracked == 0, queue() can just increment tx_queue::extra, and > > dequeue() can remove any skb from tx_queue::queue, or if empty then > > decrement tx_queue::extra. This allows it to return to a state with > > empty tx_queue::queue when new skbs no longer request timestamps. > > > > When tracked != 0, the ordering of items in the queue needs to be > > respected strictly, so queue() always pushes real skb (tracked or not) > > to TAIL, and dequeue() has to decrement extra to zero, before it can > > pop skb from queue head. > > Thanks. I did not understand why you need to queue or track any > sbs aside from those that have SKBTX_COMPLETION_TSTAMP. > > If I follow correctly this is to be able to associate the tx > completion with the right skb on the queue. Yes, it was done to maintain the queue/dequeue ordering. > The usual model in Ethernet drivers is that every tx descriptor (and > completion descriptor) in the ring is associated with a pure software > ring of metadata structures, which can point to an skb (or NULL). > > In a pinch, instead the skb on the queue itself could record the > descriptor id that it is associated with. But hci_conn_tx_queue is > too far removed from the HW, so has no direct access to that. And > similarly hci_conn_tx_dequeue has no such low level details. > > So long story short you indeed have to track this out of band with > a separate counter. I also don't immediately see a simpler way. > > Though you can perhaps replace the skb_clone (not the skb_clone_sk!) > with some sentinel value that just helps count? It probably could be done a bit smarter, it could eg. use something else than skb_queue. Or, I think we can clobber cb here as the clones are only used for timestamping, so: struct tx_queue { unsigned int pre_items; struct sk_buff_head queue; }; struct tx_queue_cb { unsigned int post_items; }; static void hci_tx_queue_push(struct tx_queue *q, struct sk_buff *skb) { struct tx_queue_cb *cb; /* HEAD * <non-tracked item> } * ... } tx_queue::pre_items of these * <non-tracked item> } * <tracked skb1> <- tx_queue::queue first item * <non-tracked item> } * ... } ((struct tx_queue_cb *)skb1->cb)->post_items * <non-tracked item> } * ... * <tracked skbn> <- tx_queue::queue n-th item * <non-tracked item> } * ... } ((struct tx_queue_cb *)skbn->cb)->post_items * <non-tracked item> } * TAIL */ if (skb) { cb = (struct tx_queue_cb *)skb->cb; cb->post_items = 0; skb_queue_tail(&q->queue, skb); } else { skb = skb_peek_tail(&q->queue); if (skb) { cb = (struct tx_queue_cb *)skb->cb; cb->post_items++; } else { q->pre_items++; } } } static struct sk_buff *hci_tx_queue_pop(struct tx_queue *q) { struct sk_buff *skb; struct tx_queue_cb *cb; if (q->pre_items) { q->pre_items--; return NULL; } skb = skb_dequeue(&q->queue); if (skb) { cb = (struct tx_queue_cb *)skb->cb; q->pre_items += cb->post_items; } return skb; } void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) { /* Emit SND now, ie. just before sending to driver */ if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); /* COMPLETION tstamp is emitted for tracked skb later in Number of * Completed Packets event. Available only for flow controlled cases. * * TODO: SCO support without flowctl (needs to be done in drivers) */ switch (conn->type) { case ISO_LINK: case ACL_LINK: case LE_LINK: break; case SCO_LINK: case ESCO_LINK: if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) return; break; default: return; } if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) skb = skb_clone_sk(skb); else skb = NULL; hci_tx_queue_push(&conn->tx_q, skb); return; } void hci_conn_tx_dequeue(struct hci_conn *conn) { struct sk_buff *skb = hci_tx_queue_pop(&conn->tx_q); if (skb) { __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_COMPLETION); kfree_skb(skb); } }
Pauli Virtanen wrote: > Hi, > > ke, 2025-03-19 kello 16:00 -0400, Willem de Bruijn kirjoitti: > > Pauli Virtanen wrote: > > > ke, 2025-03-19 kello 10:44 -0400, Willem de Bruijn kirjoitti: > > > > Jason Xing wrote: > > > > > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > > > > > > > > > Support enabling TX timestamping for some skbs, and track them until > > > > > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > > > > > completion report from hardware. > > > > > > > > > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > > > > > driver requires changes in the driver API, and drivers mostly are going > > > > > > to send the skb immediately. > > > > > > > > > > > > Make the default situation with no COMPLETION TX timestamping more > > > > > > efficient by only counting packets in the queue when there is nothing to > > > > > > track. When there is something to track, we need to make clones, since > > > > > > the driver may modify sent skbs. > > > > > > > > Why count packets at all? And if useful separate from completions, > > > > should that be a separate patch? > > > > > > This paragraph was commenting on the implementation of struct tx_queue, > > > and maybe how it works should be explicitly explained somewhere (code > > > comment?). Here's some explanation of it: > > > > > > 1) We have to hang on (clones of) skbs until completion reports for > > > them arrive, in order to emit COMPLETION timestamps. There's no > > > existing queue that does this in net/bluetooth (drivers may just copy > > > data & discard skbs, and they don't know about completion reports), so > > > something new needs to be added. > > > > > > 2) It is only needed for emitting COMPLETION timestamps. So it's better > > > to not do any extra work (clones etc.) when there are no such > > > timestamps to be emitted. > > > > > > 3) The new queue should work correctly when timestamping is turned on > > > or off, or only some packets are timestamped. It should also eventually > > > return to a state where no extra work is done, when new skbs don't > > > request COMPLETION timestamps. > > > > So far, fully understood. > > > > > struct tx_queue implements such queue that only "tracks" some skbs. > > > Logical structure: > > > > > > HEAD > > > <no stored skb> } > > > <no stored skb> } tx_queue::extra is the number of non-tracked > > > ... } logical items at queue head > > > <no stored skb> } > > > <tracked skb> } tx_queue::queue contains mixture of > > > <non-tracked skb> } tracked items (skb->sk != NULL) and > > > <non-tracked skb> } non-tracked items (skb->sk == NULL). > > > <tracked skb> } These are ordered after the "extra" items. > > > TAIL > > > > > > tx_queue::tracked is the number of tracked skbs in tx_queue::queue. > > > > > > hci_conn_tx_queue() determines whether skb is tracked (= COMPLETION > > > timestamp shall be emitted for it) and pushes a logical item to TAIL. > > > > > > hci_conn_tx_dequeue() pops a logical item from HEAD, and emits > > > timestamp if it corresponds to a tracked skb. > > > > > > When tracked == 0, queue() can just increment tx_queue::extra, and > > > dequeue() can remove any skb from tx_queue::queue, or if empty then > > > decrement tx_queue::extra. This allows it to return to a state with > > > empty tx_queue::queue when new skbs no longer request timestamps. > > > > > > When tracked != 0, the ordering of items in the queue needs to be > > > respected strictly, so queue() always pushes real skb (tracked or not) > > > to TAIL, and dequeue() has to decrement extra to zero, before it can > > > pop skb from queue head. > > > > Thanks. I did not understand why you need to queue or track any > > sbs aside from those that have SKBTX_COMPLETION_TSTAMP. > > > > If I follow correctly this is to be able to associate the tx > > completion with the right skb on the queue. > > Yes, it was done to maintain the queue/dequeue ordering. > > > The usual model in Ethernet drivers is that every tx descriptor (and > > completion descriptor) in the ring is associated with a pure software > > ring of metadata structures, which can point to an skb (or NULL). > > > > In a pinch, instead the skb on the queue itself could record the > > descriptor id that it is associated with. But hci_conn_tx_queue is > > too far removed from the HW, so has no direct access to that. And > > similarly hci_conn_tx_dequeue has no such low level details. > > > > So long story short you indeed have to track this out of band with > > a separate counter. I also don't immediately see a simpler way. > > > > Though you can perhaps replace the skb_clone (not the skb_clone_sk!) > > with some sentinel value that just helps count? > > It probably could be done a bit smarter, it could eg. use something > else than skb_queue. Or, I think we can clobber cb here as the clones > are only used for timestamping, so: > > > struct tx_queue { > unsigned int pre_items; > struct sk_buff_head queue; > }; > > struct tx_queue_cb { > unsigned int post_items; > }; > > static void hci_tx_queue_push(struct tx_queue *q, struct sk_buff *skb) > { > struct tx_queue_cb *cb; > > /* HEAD > * <non-tracked item> } > * ... } tx_queue::pre_items of these > * <non-tracked item> } > * <tracked skb1> <- tx_queue::queue first item > * <non-tracked item> } > * ... } ((struct tx_queue_cb *)skb1->cb)->post_items > * <non-tracked item> } > * ... > * <tracked skbn> <- tx_queue::queue n-th item > * <non-tracked item> } > * ... } ((struct tx_queue_cb *)skbn->cb)->post_items > * <non-tracked item> } > * TAIL > */ > if (skb) { > cb = (struct tx_queue_cb *)skb->cb; > cb->post_items = 0; > skb_queue_tail(&q->queue, skb); > } else { > skb = skb_peek_tail(&q->queue); > if (skb) { > cb = (struct tx_queue_cb *)skb->cb; > cb->post_items++; > } else { > q->pre_items++; > } > } > } > > static struct sk_buff *hci_tx_queue_pop(struct tx_queue *q) > { > struct sk_buff *skb; > struct tx_queue_cb *cb; > > if (q->pre_items) { > q->pre_items--; > return NULL; > } > > skb = skb_dequeue(&q->queue); > if (skb) { > cb = (struct tx_queue_cb *)skb->cb; > q->pre_items += cb->post_items; > } > > return skb; > } > > void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > { > /* Emit SND now, ie. just before sending to driver */ > if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); > > /* COMPLETION tstamp is emitted for tracked skb later in Number of > * Completed Packets event. Available only for flow controlled cases. > * > * TODO: SCO support without flowctl (needs to be done in drivers) > */ > switch (conn->type) { > case ISO_LINK: > case ACL_LINK: > case LE_LINK: > break; > case SCO_LINK: > case ESCO_LINK: > if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > return; > break; > default: > return; > } > > if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > skb = skb_clone_sk(skb); > else > skb = NULL; > > hci_tx_queue_push(&conn->tx_q, skb); > return; > } > > void hci_conn_tx_dequeue(struct hci_conn *conn) > { > struct sk_buff *skb = hci_tx_queue_pop(&conn->tx_q); > > if (skb) { > __skb_tstamp_tx(skb, NULL, NULL, skb->sk, > SCM_TSTAMP_COMPLETION); > kfree_skb(skb); > } > } Neat. To be clear, your call. Just if the expectation is that timestamped packets are rare even when enabled (e.g., due to sampling, or enabling only for one of many sockets), then avoiding the skb_clone in the common case may be worthwhile.
On Thu, Mar 20, 2025 at 2:21 AM Pauli Virtanen <pav@iki.fi> wrote: > > Hi, > > ke, 2025-03-19 kello 08:39 +0800, Jason Xing kirjoitti: > > On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@iki.fi> wrote: > > > > > > Support enabling TX timestamping for some skbs, and track them until > > > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > > > completion report from hardware. > > > > > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > > > driver requires changes in the driver API, and drivers mostly are going > > > to send the skb immediately. > > > > > > Make the default situation with no COMPLETION TX timestamping more > > > efficient by only counting packets in the queue when there is nothing to > > > track. When there is something to track, we need to make clones, since > > > the driver may modify sent skbs. > > > > > > The tx_q queue length is bounded by the hdev flow control, which will > > > not send new packets before it has got completion reports for old ones. > > > > > > Signed-off-by: Pauli Virtanen <pav@iki.fi> > > > --- > > > > > > Notes: > > > v5: > > > - Add hci_sockm_init() > > > - Back to decoupled COMPLETION & SND, like in v3 > > > - Handle SCO flow controlled case > > > > > > include/net/bluetooth/hci_core.h | 20 +++++ > > > net/bluetooth/hci_conn.c | 122 +++++++++++++++++++++++++++++++ > > > net/bluetooth/hci_core.c | 15 +++- > > > net/bluetooth/hci_event.c | 4 + > > > 4 files changed, 157 insertions(+), 4 deletions(-) > > > > > > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h > > > index f78e4298e39a..5115da34f881 100644 > > > --- a/include/net/bluetooth/hci_core.h > > > +++ b/include/net/bluetooth/hci_core.h > > > @@ -261,6 +261,12 @@ struct adv_info { > > > struct delayed_work rpa_expired_cb; > > > }; > > > > > > +struct tx_queue { > > > + struct sk_buff_head queue; > > > + unsigned int extra; > > > + unsigned int tracked; > > > +}; > > > + > > > #define HCI_MAX_ADV_INSTANCES 5 > > > #define HCI_DEFAULT_ADV_DURATION 2 > > > > > > @@ -733,6 +739,8 @@ struct hci_conn { > > > struct sk_buff_head data_q; > > > struct list_head chan_list; > > > > > > + struct tx_queue tx_q; > > > + > > > struct delayed_work disc_work; > > > struct delayed_work auto_accept_work; > > > struct delayed_work idle_work; > > > @@ -1572,6 +1580,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); > > > void hci_conn_failed(struct hci_conn *conn, u8 status); > > > u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle); > > > > > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb); > > > +void hci_conn_tx_dequeue(struct hci_conn *conn); > > > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > > > + const struct sockcm_cookie *sockc); > > > + > > > +static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk) > > > +{ > > > + *sockc = (struct sockcm_cookie) { > > > + .tsflags = READ_ONCE(sk->sk_tsflags), > > > + }; > > > +} > > > + > > > /* > > > * hci_conn_get() and hci_conn_put() are used to control the life-time of an > > > * "hci_conn" object. They do not guarantee that the hci_conn object is running, > > > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c > > > index d097e308a755..95972fd4c784 100644 > > > --- a/net/bluetooth/hci_conn.c > > > +++ b/net/bluetooth/hci_conn.c > > > @@ -27,6 +27,7 @@ > > > > > > #include <linux/export.h> > > > #include <linux/debugfs.h> > > > +#include <linux/errqueue.h> > > > > > > #include <net/bluetooth/bluetooth.h> > > > #include <net/bluetooth/hci_core.h> > > > @@ -1002,6 +1003,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t > > > } > > > > > > skb_queue_head_init(&conn->data_q); > > > + skb_queue_head_init(&conn->tx_q.queue); > > > > > > INIT_LIST_HEAD(&conn->chan_list); > > > INIT_LIST_HEAD(&conn->link_list); > > > @@ -1155,6 +1157,7 @@ void hci_conn_del(struct hci_conn *conn) > > > } > > > > > > skb_queue_purge(&conn->data_q); > > > + skb_queue_purge(&conn->tx_q.queue); > > > > > > /* Remove the connection from the list and cleanup its remaining > > > * state. This is a separate function since for some cases like > > > @@ -3064,3 +3067,122 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) > > > */ > > > return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); > > > } > > > + > > > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > > > + const struct sockcm_cookie *sockc) > > > +{ > > > + struct sock *sk = skb ? skb->sk : NULL; > > > + > > > + /* This shall be called on a single skb of those generated by user > > > + * sendmsg(), and only when the sendmsg() does not return error to > > > + * user. This is required for keeping the tskey that increments here in > > > + * sync with possible sendmsg() counting by user. > > > + * > > > + * Stream sockets shall set key_offset to sendmsg() length in bytes > > > + * and call with the last fragment, others to 1 and first fragment. > > > + */ > > > + > > > + if (!skb || !sockc || !sk || !key_offset) > > > + return; > > > + > > > + sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); > > > + > > > + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && > > > + sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { > > > + if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { > > > + skb_shinfo(skb)->tskey = sockc->ts_opt_id; > > > + } else { > > > + int key = atomic_add_return(key_offset, &sk->sk_tskey); > > > + > > > + skb_shinfo(skb)->tskey = key - 1; > > > + } > > > + } > > > +} > > > + > > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > > > +{ > > > + struct tx_queue *comp = &conn->tx_q; > > > + bool track = false; > > > + > > > + /* Emit SND now, ie. just before sending to driver */ > > > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); > > > > It's a bit strange that SCM_TSTAMP_SND is under the control of > > SKBTX_SW_TSTAMP. Can we use the same flag for both lines here > > directly? I suppose I would use SKBTX_SW_TSTAMP then. > > This is more or less open-coded skb_tx_timestamp(), which drivers do > before sending to HW, for the Bluetooth case. AFAIK it should be done > like this. You're right. Sorry for misreading this part yesterday somehow... > > > > > > + > > > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > > > + * Completed Packets event. Available only for flow controlled cases. > > > + * > > > + * TODO: SCO support without flowctl (needs to be done in drivers) > > > + */ > > > + switch (conn->type) { > > > + case ISO_LINK: > > > + case ACL_LINK: > > > + case LE_LINK: > > > + break; > > > + case SCO_LINK: > > > + case ESCO_LINK: > > > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > > > + return; > > > + break; > > > + default: > > > + return; > > > + } > > > + > > > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > > > + track = true; > > > + > > > + /* If nothing is tracked, just count extra skbs at the queue head */ > > > + if (!track && !comp->tracked) { > > > + comp->extra++; > > > + return; > > > + } > > > + > > > + if (track) { > > > + skb = skb_clone_sk(skb); > > > + if (!skb) > > > + goto count_only; > > > + > > > + comp->tracked++; > > > + } else { > > > + skb = skb_clone(skb, GFP_KERNEL); > > > + if (!skb) > > > + goto count_only; > > > + } > > > + > > > + skb_queue_tail(&comp->queue, skb); > > > + return; > > > + > > > +count_only: > > > + /* Stop tracking skbs, and only count. This will not emit timestamps for > > > + * the packets, but if we get here something is more seriously wrong. > > > + */ > > > + comp->tracked = 0; > > > + comp->extra += skb_queue_len(&comp->queue) + 1; > > > + skb_queue_purge(&comp->queue); > > > +} > > > + > > > +void hci_conn_tx_dequeue(struct hci_conn *conn) > > > +{ > > > + struct tx_queue *comp = &conn->tx_q; > > > + struct sk_buff *skb; > > > + > > > + /* If there are tracked skbs, the counted extra go before dequeuing real > > > + * skbs, to keep ordering. When nothing is tracked, the ordering doesn't > > > + * matter so dequeue real skbs first to get rid of them ASAP. > > > + */ > > > + if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) { > > > + comp->extra--; > > > + return; > > > + } > > > + > > > + skb = skb_dequeue(&comp->queue); > > > + if (!skb) > > > + return; > > > + > > > + if (skb->sk) { > > > + comp->tracked--; > > > > Need an explicit if statement here? > > I think no, see explanation of how it works in the reply to Willem: > https://lore.kernel.org/linux-bluetooth/5882af942ef8cf5c9b4ce36a348f959807a387b0.camel@iki.fi/ > > > > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, > > > + SCM_TSTAMP_COMPLETION); > > > > This is the socket timestamping, and that's right. My minor question > > is: for the use of bpf timestamping (that should be easy as you've > > done in patch 1, I presume), I'm not sure if you're familiar with it. > > If not, I plan to implement it myself in a follow-up patch and then > > let you do some tests? Of course, I will provide the bpf test script > > I saw the BPF timestamping things, but didn't look in full detail yet. > I don't know much about BPF, but IIUC, is it just as simple as adding > BPF_SOCK_OPS_TSTAMP_COMPLETION_CB and then modifying > skb_tstamp_tx_report_bpf_timestamping() accordingly? > > I think we'd want to add also the BPF tests in the Bluetooth socket > timestamping tests Good to know that! > > https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/doc/test-runner.rst > https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/tester.h#n91 > https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/iso-tester.c#n2275 > https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/sco-tester.c#n755 > https://web.git.kernel.org/pub/scm/bluetooth/bluez.git/tree/tools/l2cap-tester.c#n1369 > > If you could show an example how to setup the BPF tstamps and pass the > resulting tstamp data in some way back to the application, that could > be very helpful (and I could postpone learning BPF for a little while > longer :) Sure, I can leave it to you as long as you want to take over it :) A simple case where I re-support the bpf extension for socket timestamping goes like this commit [1]. And the selftest can be seen here[2]. A rough thought is that letting it pass in skb_tstamp_tx_report_bpf_timestamping() and adding corresponding BPF flag as you mentioned before are the key to do it on top of the current series. It should not be that hard. If you have any questions on how we move on about this point, please feel free to reach out to me anytime. [1]: https://web.git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=6b98ec7e882af1c3088a88757e2226d06c8514f9 [2]: https://web.git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=f4924aec58dd9e14779f4bc11a6bf3a830a42a6c Note: you will compile tools/testing/selftests/bpf/ and run ./test_progs -t net_timestamp to see how it works. Thanks, Jason > > > :) > > > > Thanks, > > Jason > > > > > + } > > > + > > > + kfree_skb(skb); > > > +} > > > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > > > index 94d9147612da..5eb0600bbd03 100644 > > > --- a/net/bluetooth/hci_core.c > > > +++ b/net/bluetooth/hci_core.c > > > @@ -3029,6 +3029,13 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) > > > return 0; > > > } > > > > > > +static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, > > > + struct sk_buff *skb) > > > +{ > > > + hci_conn_tx_queue(conn, skb); > > > + return hci_send_frame(hdev, skb); > > > +} > > > + > > > /* Send HCI command */ > > > int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, > > > const void *param) > > > @@ -3575,7 +3582,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) > > > while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { > > > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > > > BT_DBG("skb %p len %d", skb, skb->len); > > > - hci_send_frame(hdev, skb); > > > + hci_send_conn_frame(hdev, conn, skb); > > > > > > conn->sent++; > > > if (conn->sent == ~0) > > > @@ -3618,7 +3625,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) > > > hci_conn_enter_active_mode(chan->conn, > > > bt_cb(skb)->force_active); > > > > > > - hci_send_frame(hdev, skb); > > > + hci_send_conn_frame(hdev, chan->conn, skb); > > > hdev->acl_last_tx = jiffies; > > > > > > hdev->acl_cnt--; > > > @@ -3674,7 +3681,7 @@ static void hci_sched_le(struct hci_dev *hdev) > > > > > > skb = skb_dequeue(&chan->data_q); > > > > > > - hci_send_frame(hdev, skb); > > > + hci_send_conn_frame(hdev, chan->conn, skb); > > > hdev->le_last_tx = jiffies; > > > > > > (*cnt)--; > > > @@ -3708,7 +3715,7 @@ static void hci_sched_iso(struct hci_dev *hdev) > > > while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { > > > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > > > BT_DBG("skb %p len %d", skb, skb->len); > > > - hci_send_frame(hdev, skb); > > > + hci_send_conn_frame(hdev, conn, skb); > > > > > > conn->sent++; > > > if (conn->sent == ~0) > > > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c > > > index 0df4a0e082c8..83990c975c1f 100644 > > > --- a/net/bluetooth/hci_event.c > > > +++ b/net/bluetooth/hci_event.c > > > @@ -4415,6 +4415,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > > > struct hci_comp_pkts_info *info = &ev->handles[i]; > > > struct hci_conn *conn; > > > __u16 handle, count; > > > + unsigned int i; > > > > > > handle = __le16_to_cpu(info->handle); > > > count = __le16_to_cpu(info->count); > > > @@ -4425,6 +4426,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > > > > > > conn->sent -= count; > > > > > > + for (i = 0; i < count; ++i) > > > + hci_conn_tx_dequeue(conn); > > > + > > > switch (conn->type) { > > > case ACL_LINK: > > > hdev->acl_cnt += count; > > > -- > > > 2.48.1 > > > > > > > > -- > Pauli Virtanen
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f78e4298e39a..5115da34f881 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -261,6 +261,12 @@ struct adv_info { struct delayed_work rpa_expired_cb; }; +struct tx_queue { + struct sk_buff_head queue; + unsigned int extra; + unsigned int tracked; +}; + #define HCI_MAX_ADV_INSTANCES 5 #define HCI_DEFAULT_ADV_DURATION 2 @@ -733,6 +739,8 @@ struct hci_conn { struct sk_buff_head data_q; struct list_head chan_list; + struct tx_queue tx_q; + struct delayed_work disc_work; struct delayed_work auto_accept_work; struct delayed_work idle_work; @@ -1572,6 +1580,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); void hci_conn_failed(struct hci_conn *conn, u8 status); u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle); +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb); +void hci_conn_tx_dequeue(struct hci_conn *conn); +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, + const struct sockcm_cookie *sockc); + +static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk) +{ + *sockc = (struct sockcm_cookie) { + .tsflags = READ_ONCE(sk->sk_tsflags), + }; +} + /* * hci_conn_get() and hci_conn_put() are used to control the life-time of an * "hci_conn" object. They do not guarantee that the hci_conn object is running, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d097e308a755..95972fd4c784 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/debugfs.h> +#include <linux/errqueue.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -1002,6 +1003,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t } skb_queue_head_init(&conn->data_q); + skb_queue_head_init(&conn->tx_q.queue); INIT_LIST_HEAD(&conn->chan_list); INIT_LIST_HEAD(&conn->link_list); @@ -1155,6 +1157,7 @@ void hci_conn_del(struct hci_conn *conn) } skb_queue_purge(&conn->data_q); + skb_queue_purge(&conn->tx_q.queue); /* Remove the connection from the list and cleanup its remaining * state. This is a separate function since for some cases like @@ -3064,3 +3067,122 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) */ return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); } + +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, + const struct sockcm_cookie *sockc) +{ + struct sock *sk = skb ? skb->sk : NULL; + + /* This shall be called on a single skb of those generated by user + * sendmsg(), and only when the sendmsg() does not return error to + * user. This is required for keeping the tskey that increments here in + * sync with possible sendmsg() counting by user. + * + * Stream sockets shall set key_offset to sendmsg() length in bytes + * and call with the last fragment, others to 1 and first fragment. + */ + + if (!skb || !sockc || !sk || !key_offset) + return; + + sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); + + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && + sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { + if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { + skb_shinfo(skb)->tskey = sockc->ts_opt_id; + } else { + int key = atomic_add_return(key_offset, &sk->sk_tskey); + + skb_shinfo(skb)->tskey = key - 1; + } + } +} + +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) +{ + struct tx_queue *comp = &conn->tx_q; + bool track = false; + + /* Emit SND now, ie. just before sending to driver */ + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); + + /* COMPLETION tstamp is emitted for tracked skb later in Number of + * Completed Packets event. Available only for flow controlled cases. + * + * TODO: SCO support without flowctl (needs to be done in drivers) + */ + switch (conn->type) { + case ISO_LINK: + case ACL_LINK: + case LE_LINK: + break; + case SCO_LINK: + case ESCO_LINK: + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) + return; + break; + default: + return; + } + + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) + track = true; + + /* If nothing is tracked, just count extra skbs at the queue head */ + if (!track && !comp->tracked) { + comp->extra++; + return; + } + + if (track) { + skb = skb_clone_sk(skb); + if (!skb) + goto count_only; + + comp->tracked++; + } else { + skb = skb_clone(skb, GFP_KERNEL); + if (!skb) + goto count_only; + } + + skb_queue_tail(&comp->queue, skb); + return; + +count_only: + /* Stop tracking skbs, and only count. This will not emit timestamps for + * the packets, but if we get here something is more seriously wrong. + */ + comp->tracked = 0; + comp->extra += skb_queue_len(&comp->queue) + 1; + skb_queue_purge(&comp->queue); +} + +void hci_conn_tx_dequeue(struct hci_conn *conn) +{ + struct tx_queue *comp = &conn->tx_q; + struct sk_buff *skb; + + /* If there are tracked skbs, the counted extra go before dequeuing real + * skbs, to keep ordering. When nothing is tracked, the ordering doesn't + * matter so dequeue real skbs first to get rid of them ASAP. + */ + if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) { + comp->extra--; + return; + } + + skb = skb_dequeue(&comp->queue); + if (!skb) + return; + + if (skb->sk) { + comp->tracked--; + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, + SCM_TSTAMP_COMPLETION); + } + + kfree_skb(skb); +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 94d9147612da..5eb0600bbd03 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3029,6 +3029,13 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, + struct sk_buff *skb) +{ + hci_conn_tx_queue(conn, skb); + return hci_send_frame(hdev, skb); +} + /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) @@ -3575,7 +3582,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, conn, skb); conn->sent++; if (conn->sent == ~0) @@ -3618,7 +3625,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) hci_conn_enter_active_mode(chan->conn, bt_cb(skb)->force_active); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, chan->conn, skb); hdev->acl_last_tx = jiffies; hdev->acl_cnt--; @@ -3674,7 +3681,7 @@ static void hci_sched_le(struct hci_dev *hdev) skb = skb_dequeue(&chan->data_q); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, chan->conn, skb); hdev->le_last_tx = jiffies; (*cnt)--; @@ -3708,7 +3715,7 @@ static void hci_sched_iso(struct hci_dev *hdev) while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, conn, skb); conn->sent++; if (conn->sent == ~0) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0df4a0e082c8..83990c975c1f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4415,6 +4415,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, struct hci_comp_pkts_info *info = &ev->handles[i]; struct hci_conn *conn; __u16 handle, count; + unsigned int i; handle = __le16_to_cpu(info->handle); count = __le16_to_cpu(info->count); @@ -4425,6 +4426,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, conn->sent -= count; + for (i = 0; i < count; ++i) + hci_conn_tx_dequeue(conn); + switch (conn->type) { case ACL_LINK: hdev->acl_cnt += count;
Support enabling TX timestamping for some skbs, and track them until packet completion. Generate software SCM_TSTAMP_COMPLETION when getting completion report from hardware. Generate software SCM_TSTAMP_SND before sending to driver. Sending from driver requires changes in the driver API, and drivers mostly are going to send the skb immediately. Make the default situation with no COMPLETION TX timestamping more efficient by only counting packets in the queue when there is nothing to track. When there is something to track, we need to make clones, since the driver may modify sent skbs. The tx_q queue length is bounded by the hdev flow control, which will not send new packets before it has got completion reports for old ones. Signed-off-by: Pauli Virtanen <pav@iki.fi> --- Notes: v5: - Add hci_sockm_init() - Back to decoupled COMPLETION & SND, like in v3 - Handle SCO flow controlled case include/net/bluetooth/hci_core.h | 20 +++++ net/bluetooth/hci_conn.c | 122 +++++++++++++++++++++++++++++++ net/bluetooth/hci_core.c | 15 +++- net/bluetooth/hci_event.c | 4 + 4 files changed, 157 insertions(+), 4 deletions(-)