aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2025-03-17 20:24:16 +1100
committerStefano Brivio <sbrivio@redhat.com>2025-03-20 20:33:09 +0100
commita41d6d125eca5ac8c54bed8157098be141557b03 (patch)
tree9ee7d475734d798733cb566823df67eaa305a8ce
parente43e00719d7701301e4bc4fb179dc7adff175409 (diff)
downloadpasst-a41d6d125eca5ac8c54bed8157098be141557b03.tar
passt-a41d6d125eca5ac8c54bed8157098be141557b03.tar.gz
passt-a41d6d125eca5ac8c54bed8157098be141557b03.tar.bz2
passt-a41d6d125eca5ac8c54bed8157098be141557b03.tar.lz
passt-a41d6d125eca5ac8c54bed8157098be141557b03.tar.xz
passt-a41d6d125eca5ac8c54bed8157098be141557b03.tar.zst
passt-a41d6d125eca5ac8c54bed8157098be141557b03.zip
tap: Make size of pool_tap[46] purely a tuning parameter
Currently we attempt to size pool_tap[46] so they have room for the maximum possible number of packets that could fit in pkt_buf (TAP_MSGS). However, the calculation isn't quite correct: TAP_MSGS is based on ETH_ZLEN (60) as the minimum possible L2 frame size. But ETH_ZLEN is based on physical constraints of Ethernet, which don't apply to our virtual devices. It is possible to generate a legitimate frame smaller than this, for example an empty payload UDP/IPv4 frame on the 'pasta' backend is only 42 bytes long. Further more, the same limit applies for vhost-user, which is not limited by the size of pkt_buf like the other backends. In that case we don't even have full control of the maximum buffer size, so we can't really calculate how many packets could fit in there. If we exceed do TAP_MSGS we'll drop packets, not just use more batches, which is moderately bad. The fact that this needs to be sized just so for correctness not merely for tuning is a fairly non-obvious coupling between different parts of the code. To make this more robust, alter the tap code so it doesn't rely on everything fitting in a single batch of TAP_MSGS packets, instead breaking into multiple batches as necessary. This leaves TAP_MSGS as purely a tuning parameter, which we can freely adjust based on performance measures. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--packet.c13
-rw-r--r--packet.h3
-rw-r--r--passt.h2
-rw-r--r--tap.c19
-rw-r--r--tap.h3
-rw-r--r--vu_common.c5
6 files changed, 36 insertions, 9 deletions
diff --git a/packet.c b/packet.c
index d1a51a5..08076d5 100644
--- a/packet.c
+++ b/packet.c
@@ -68,6 +68,17 @@ static int packet_check_range(const struct pool *p, const char *ptr, size_t len,
return 0;
}
/**
+ * pool_full() - Is a packet pool full?
+ * @p: Pointer to packet pool
+ *
+ * Return: true if the pool is full, false if more packets can be added
+ */
+bool pool_full(const struct pool *p)
+{
+ return p->count >= p->size;
+}
+
+/**
* packet_add_do() - Add data as packet descriptor to given pool
* @p: Existing pool
* @len: Length of new descriptor
@@ -80,7 +91,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
{
size_t idx = p->count;
- if (idx >= p->size) {
+ if (pool_full(p)) {
trace("add packet index %zu to pool with size %zu, %s:%i",
idx, p->size, func, line);
return;
diff --git a/packet.h b/packet.h
index d099f02..dd18461 100644
--- a/packet.h
+++ b/packet.h
@@ -6,6 +6,8 @@
#ifndef PACKET_H
#define PACKET_H
+#include <stdbool.h>
+
/* Maximum size of a single packet stored in pool, including headers */
#define PACKET_MAX_LEN UINT16_MAX
@@ -33,6 +35,7 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
void *packet_get_do(const struct pool *p, const size_t idx,
size_t offset, size_t len, size_t *left,
const char *func, int line);
+bool pool_full(const struct pool *p);
void pool_flush(struct pool *p);
#define packet_add(p, len, start) \
diff --git a/passt.h b/passt.h
index 8f45091..8693794 100644
--- a/passt.h
+++ b/passt.h
@@ -71,8 +71,6 @@ static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
/* Large enough for ~128 maximum size frames */
#define PKT_BUF_BYTES (8UL << 20)
-#define TAP_MSGS \
- DIV_ROUND_UP(PKT_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
extern char pkt_buf [PKT_BUF_BYTES];
diff --git a/tap.c b/tap.c
index 182a115..34e6774 100644
--- a/tap.c
+++ b/tap.c
@@ -75,6 +75,9 @@ CHECK_FRAME_LEN(L2_MAX_LEN_PASTA);
CHECK_FRAME_LEN(L2_MAX_LEN_PASST);
CHECK_FRAME_LEN(L2_MAX_LEN_VU);
+#define TAP_MSGS \
+ DIV_ROUND_UP(sizeof(pkt_buf), ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
+
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
@@ -1042,8 +1045,10 @@ void tap_handler(struct ctx *c, const struct timespec *now)
* @c: Execution context
* @l2len: Total L2 packet length
* @p: Packet buffer
+ * @now: Current timestamp
*/
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+ const struct timespec *now)
{
const struct ethhdr *eh;
@@ -1059,9 +1064,17 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
case ETH_P_IP:
+ if (pool_full(pool_tap4)) {
+ tap4_handler(c, pool_tap4, now);
+ pool_flush(pool_tap4);
+ }
packet_add(pool_tap4, l2len, p);
break;
case ETH_P_IPV6:
+ if (pool_full(pool_tap6)) {
+ tap6_handler(c, pool_tap6, now);
+ pool_flush(pool_tap6);
+ }
packet_add(pool_tap6, l2len, p);
break;
default:
@@ -1142,7 +1155,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
p += sizeof(uint32_t);
n -= sizeof(uint32_t);
- tap_add_packet(c, l2len, p);
+ tap_add_packet(c, l2len, p, now);
p += l2len;
n -= l2len;
@@ -1207,7 +1220,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
len > (ssize_t)L2_MAX_LEN_PASTA)
continue;
- tap_add_packet(c, len, pkt_buf + n);
+ tap_add_packet(c, len, pkt_buf + n, now);
}
tap_handler(c, now);
diff --git a/tap.h b/tap.h
index dd39fd8..6fe3d15 100644
--- a/tap.h
+++ b/tap.h
@@ -119,6 +119,7 @@ void tap_sock_update_pool(void *base, size_t size);
void tap_backend_init(struct ctx *c);
void tap_flush_pools(void);
void tap_handler(struct ctx *c, const struct timespec *now);
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+ const struct timespec *now);
#endif /* TAP_H */
diff --git a/vu_common.c b/vu_common.c
index cefe5e2..5e6fd4a 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -195,7 +195,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
tap_add_packet(vdev->context,
elem[count].out_sg[0].iov_len - hdrlen,
(char *)elem[count].out_sg[0].iov_base +
- hdrlen);
+ hdrlen, now);
} else {
/* vnet header can be in a separate iovec */
if (elem[count].out_num != 2) {
@@ -207,7 +207,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
} else {
tap_add_packet(vdev->context,
elem[count].out_sg[1].iov_len,
- (char *)elem[count].out_sg[1].iov_base);
+ (char *)elem[count].out_sg[1].iov_base,
+ now);
}
}