aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2025-03-17 20:24:17 +1100
committerStefano Brivio <sbrivio@redhat.com>2025-03-20 20:33:12 +0100
commit9866d146e654975dd7f5fd3f1294d5fc4628cef3 (patch)
treed2f9f9030aaefeefe844f88cdd74715d74fc0dc6
parenta41d6d125eca5ac8c54bed8157098be141557b03 (diff)
downloadpasst-9866d146e654975dd7f5fd3f1294d5fc4628cef3.tar
passt-9866d146e654975dd7f5fd3f1294d5fc4628cef3.tar.gz
passt-9866d146e654975dd7f5fd3f1294d5fc4628cef3.tar.bz2
passt-9866d146e654975dd7f5fd3f1294d5fc4628cef3.tar.lz
passt-9866d146e654975dd7f5fd3f1294d5fc4628cef3.tar.xz
passt-9866d146e654975dd7f5fd3f1294d5fc4628cef3.tar.zst
passt-9866d146e654975dd7f5fd3f1294d5fc4628cef3.zip
tap: Clarify calculation of TAP_MSGS
The rationale behind the calculation of TAP_MSGS isn't necessarily obvious. It's supposed to be the maximum number of packets that can fit in pkt_buf. However, the calculation is wrong in several ways: * It's based on ETH_ZLEN which isn't meaningful for virtual devices * It always includes the qemu socket header which isn't used for pasta * The size of pkt_buf isn't relevant for vhost-user We've already made sure this is just a tuning parameter, not a hard limit. Clarify what we're calculating here and why. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--tap.c28
1 files changed, 22 insertions, 6 deletions
diff --git a/tap.c b/tap.c
index 34e6774..3a6fcbe 100644
--- a/tap.c
+++ b/tap.c
@@ -75,12 +75,28 @@ CHECK_FRAME_LEN(L2_MAX_LEN_PASTA);
CHECK_FRAME_LEN(L2_MAX_LEN_PASST);
CHECK_FRAME_LEN(L2_MAX_LEN_VU);
-#define TAP_MSGS \
- DIV_ROUND_UP(sizeof(pkt_buf), ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
+/* We try size the packet pools so that we can use a single batch for the entire
+ * packet buffer. This might be exceeded for vhost-user, though, which uses its
+ * own buffers rather than pkt_buf.
+ *
+ * This is just a tuning parameter, the code will work with slightly more
+ * overhead if it's incorrect. So, we estimate based on the minimum practical
+ * frame size - an empty UDP datagram - rather than the minimum theoretical
+ * frame size.
+ *
+ * FIXME: Profile to work out how big this actually needs to be to amortise
+ * per-batch syscall overheads
+ */
+#define TAP_MSGS_IP4 \
+ DIV_ROUND_UP(sizeof(pkt_buf), \
+ ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr))
+#define TAP_MSGS_IP6 \
+ DIV_ROUND_UP(sizeof(pkt_buf), \
+ ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
-static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
-static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
+static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS_IP4, pkt_buf);
+static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS_IP6, pkt_buf);
#define TAP_SEQS 128 /* Different L4 tuples in one batch */
#define FRAGMENT_MSG_RATE 10 /* # seconds between fragment warnings */
@@ -1418,8 +1434,8 @@ void tap_sock_update_pool(void *base, size_t size)
{
int i;
- pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, base, size);
- pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, base, size);
+ pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS_IP4, base, size);
+ pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS_IP6, base, size);
for (i = 0; i < TAP_SEQS; i++) {
tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size);