bluez5: deal with missing TX timestamps

There are known controller firmware bugs that cause packet completion
reports, mainly for ISO packets, to be missing.

To avoid getting stuck e.g. in ISO queue flushing, we should consider a
packet completed if sufficient time has passed even if controller (and
kernel) don't report it completed. Take 1 s as conservative timeout, the
expected values are some ms.

These firmware bugs also cause kernel to stop sending packets if too
many are left uncompleted, but we cannot detect that.
This commit is contained in:
Pauli Virtanen 2026-01-11 14:25:23 +02:00
parent 0869be11f8
commit d0309b4e1e
3 changed files with 111 additions and 36 deletions

View File

@ -12,6 +12,7 @@
#include <linux/sockios.h>
#include <spa/utils/defs.h>
#include <spa/utils/ratelimit.h>
#include <spa/support/log.h>
#include "defs.h"
@ -39,10 +40,16 @@ struct spa_bt_latency
size_t unsent;
struct {
int64_t send[64];
size_t size[64];
struct {
int64_t send;
uint32_t pos;
size_t size;
bool snd;
bool completion;
} pending[64];
uint32_t pos;
int64_t prev_tx;
struct spa_ratelimit rate_limit;
} impl;
};
@ -55,6 +62,9 @@ static inline void spa_bt_latency_init(struct spa_bt_latency *lat, struct spa_bt
spa_zero(*lat);
lat->impl.rate_limit.interval = 5 * 60 * SPA_NSEC_PER_SEC;
lat->impl.rate_limit.burst = 8;
if (!transport->device->adapter->tx_timestamping_supported)
return;
@ -81,6 +91,33 @@ static inline void spa_bt_latency_reset(struct spa_bt_latency *lat)
spa_bt_ptp_init(&lat->ptp, lat->ptp.period, lat->ptp.period / 2);
}
static inline void spa_bt_latency_clear_pending(struct spa_bt_latency *lat, unsigned int i,
bool snd, bool completion)
{
i = i % SPA_N_ELEMENTS(lat->impl.pending);
if (snd && lat->impl.pending[i].snd) {
if (lat->kernel_queue)
lat->kernel_queue--;
lat->impl.pending[i].snd = false;
}
if (completion && lat->impl.pending[i].completion) {
if (lat->unsent > lat->impl.pending[i].size)
lat->unsent -= lat->impl.pending[i].size;
else
lat->unsent = 0;
if (lat->queue > 0)
lat->queue--;
if (!lat->queue)
lat->unsent = 0;
lat->impl.pending[i].completion = false;
}
}
static inline ssize_t spa_bt_send(int fd, const void *buf, size_t size,
struct spa_bt_latency *lat, uint64_t now)
{
@ -90,12 +127,16 @@ static inline ssize_t spa_bt_send(int fd, const void *buf, size_t size,
return res;
if (res >= 0) {
lat->impl.send[lat->impl.pos] = now;
lat->impl.size[lat->impl.pos] = size;
lat->impl.pos++;
if (lat->impl.pos >= SPA_N_ELEMENTS(lat->impl.send))
lat->impl.pos = 0;
uint32_t i = lat->impl.pos % SPA_N_ELEMENTS(lat->impl.pending);
spa_bt_latency_clear_pending(lat, i, true, true);
lat->impl.pending[i].send = now;
lat->impl.pending[i].size = size;
lat->impl.pending[i].pos = lat->impl.pos % UINT16_MAX;
lat->impl.pending[i].snd = true;
lat->impl.pending[i].completion = true;
lat->impl.pos++;
lat->queue++;
lat->kernel_queue++;
lat->unsent += size;
@ -104,12 +145,14 @@ static inline ssize_t spa_bt_send(int fd, const void *buf, size_t size,
return res;
}
static inline int spa_bt_latency_recv_errqueue(struct spa_bt_latency *lat, int fd, struct spa_log *log)
static inline int spa_bt_latency_recv_errqueue(struct spa_bt_latency *lat, int fd, int64_t now,
struct spa_log *log)
{
union {
char buf[CMSG_SPACE(32 * sizeof(struct scm_timestamping))];
struct cmsghdr align;
} control;
unsigned int i;
if (!lat->enabled)
return -EOPNOTSUPP;
@ -126,8 +169,9 @@ static inline int spa_bt_latency_recv_errqueue(struct spa_bt_latency *lat, int f
.msg_controllen = sizeof(control.buf),
};
struct cmsghdr *cmsg;
struct scm_timestamping *tss = NULL;
struct sock_extended_err *serr = NULL;
bool have_tss = false, have_serr = false;
struct scm_timestamping tss;
struct sock_extended_err serr;
int res;
res = recvmsg(fd, &msg, MSG_ERRQUEUE | MSG_DONTWAIT);
@ -138,47 +182,49 @@ static inline int spa_bt_latency_recv_errqueue(struct spa_bt_latency *lat, int f
}
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_TIMESTAMPING)
tss = (void *)CMSG_DATA(cmsg);
else if (cmsg->cmsg_level == SOL_BLUETOOTH && cmsg->cmsg_type == BT_SCM_ERROR)
serr = (void *)CMSG_DATA(cmsg);
else
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_TIMESTAMPING) {
memcpy(&tss, CMSG_DATA(cmsg), sizeof(tss));
have_tss = true;
} else if (cmsg->cmsg_level == SOL_BLUETOOTH && cmsg->cmsg_type == BT_SCM_ERROR) {
memcpy(&serr, CMSG_DATA(cmsg), sizeof(serr));
have_serr = true;
} else {
continue;
}
}
if (!tss || !serr || serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING)
return -EINVAL;
if (!have_tss || !have_serr || serr.ee_errno != ENOMSG || serr.ee_origin != SO_EE_ORIGIN_TIMESTAMPING)
continue;
switch (serr->ee_info) {
uint32_t tx_pos = serr.ee_data % SPA_N_ELEMENTS(lat->impl.pending);
if (serr.ee_data % UINT16_MAX != lat->impl.pending[tx_pos].pos) {
spa_log_debug(log, "fd:%d latency[%u] bad value %u", fd, tx_pos, serr.ee_data);
continue;
}
switch (serr.ee_info) {
case SCM_TSTAMP_SND:
if (lat->kernel_queue)
lat->kernel_queue--;
spa_bt_latency_clear_pending(lat, tx_pos, true, false);
continue;
case NEW_SCM_TSTAMP_COMPLETION:
if (!lat->impl.pending[tx_pos].completion)
continue;
break;
default:
continue;
}
struct timespec *ts = &tss->ts[0];
struct timespec *ts = &tss.ts[0];
int64_t tx_time = SPA_TIMESPEC_TO_NSEC(ts);
uint32_t tx_pos = serr->ee_data % SPA_N_ELEMENTS(lat->impl.send);
lat->value = tx_time - lat->impl.send[tx_pos];
if (lat->unsent > lat->impl.size[tx_pos])
lat->unsent -= lat->impl.size[tx_pos];
else
lat->unsent = 0;
lat->value = tx_time - lat->impl.pending[tx_pos].send;
if (lat->impl.prev_tx && tx_time > lat->impl.prev_tx)
spa_bt_ptp_update(&lat->ptp, lat->value, tx_time - lat->impl.prev_tx);
lat->impl.prev_tx = tx_time;
if (lat->queue > 0)
lat->queue--;
if (!lat->queue)
lat->unsent = 0;
spa_bt_latency_clear_pending(lat, tx_pos, false, true);
spa_log_trace(log, "fd:%d latency[%d] nsec:%"PRIu64" range:%d..%d ms",
fd, tx_pos, lat->value,
@ -186,6 +232,27 @@ static inline int spa_bt_latency_recv_errqueue(struct spa_bt_latency *lat, int f
(int)(spa_bt_ptp_valid(&lat->ptp) ? lat->ptp.max / SPA_NSEC_PER_MSEC : -1));
} while (true);
/* Clear too old pending latencies. Controllers (eg. Intel AX210, Realtek 8761CU)
* have known firmware bugs where they fail to report ISO packet completions. This
* will cause completion timestamps to be missing, so we should try to recover
* from this. (Kernel as of v6.18 will eventually stop sending though as it will
* think buffers are full.)
*/
for (i = 0; i < SPA_N_ELEMENTS(lat->impl.pending); ++i) {
if (lat->impl.pending[i].snd || lat->impl.pending[i].completion) {
if (lat->impl.pending[i].send + SPA_NSEC_PER_SEC < now) {
int suppressed;
if ((suppressed = spa_ratelimit_test(&lat->impl.rate_limit, now)) >= 0)
spa_log_warn(log, "Missing completion reports for packet (%d suppressed): "
"Bluetooth adapter firmware bug?", suppressed);
spa_log_trace(log, "fd:%d latency[%u] too late", fd, i);
spa_bt_latency_clear_pending(lat, i, true, true);
}
}
}
lat->valid = spa_bt_ptp_valid(&lat->ptp);
return 0;
@ -200,7 +267,7 @@ static inline void spa_bt_latency_flush(struct spa_bt_latency *lat, int fd, stru
/* Disable timestamping and flush errqueue */
setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping, sizeof(so_timestamping));
spa_bt_latency_recv_errqueue(lat, fd, log);
spa_bt_latency_recv_errqueue(lat, fd, 0, log);
lat->enabled = false;
}

View File

@ -258,6 +258,7 @@ static void group_on_timeout(struct spa_source *source)
bool resync = false;
bool fail = false;
uint64_t exp;
uint64_t now_realtime;
int res;
if ((res = spa_system_timerfd_read(group->data_system, group->timerfd, &exp)) < 0) {
@ -269,6 +270,8 @@ static void group_on_timeout(struct spa_source *source)
if (!exp)
return;
now_realtime = get_time_ns(group->data_system, CLOCK_REALTIME);
spa_list_for_each(stream, &group->streams, link) {
if (!stream->ready)
goto done;
@ -283,7 +286,7 @@ static void group_on_timeout(struct spa_source *source)
continue;
}
spa_bt_latency_recv_errqueue(&stream->tx_latency, stream->fd, group->log);
spa_bt_latency_recv_errqueue(&stream->tx_latency, stream->fd, now_realtime, group->log);
if (stream->this.need_resync) {
resync = true;
@ -649,6 +652,7 @@ int spa_bt_iso_io_recv_errqueue(struct spa_bt_iso_io *this)
{
struct stream *stream = SPA_CONTAINER_OF(this, struct stream, this);
struct group *group = stream->group;
uint64_t now_realtime;
if (!stream->sink) {
struct stream *s;
@ -661,7 +665,8 @@ int spa_bt_iso_io_recv_errqueue(struct spa_bt_iso_io *this)
}
}
return spa_bt_latency_recv_errqueue(&stream->tx_latency, stream->fd, group->log);
now_realtime = get_time_ns(group->data_system, CLOCK_REALTIME);
return spa_bt_latency_recv_errqueue(&stream->tx_latency, stream->fd, now_realtime, group->log);
}
/**

View File

@ -1315,7 +1315,10 @@ static void media_on_flush_error(struct spa_source *source)
if (spa_bt_iso_io_recv_errqueue(this->transport->iso_io) == 0)
return;
} else {
if (spa_bt_latency_recv_errqueue(&this->tx_latency, this->flush_source.fd, this->log) == 0)
struct timespec ts;
spa_system_clock_gettime(this->data_system, CLOCK_REALTIME, &ts);
if (spa_bt_latency_recv_errqueue(&this->tx_latency, this->flush_source.fd, SPA_TIMESPEC_TO_NSEC(&ts), this->log) == 0)
return;
}