diff options
| author | Laurent Vivier <lvivier@redhat.com> | 2025-10-21 23:01:13 +0200 |
|---|---|---|
| committer | Stefano Brivio <sbrivio@redhat.com> | 2025-10-30 15:32:50 +0100 |
| commit | dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061 (patch) | |
| tree | 3bad20da87579b02e9f848ebbbfbc89b51ea4370 | |
| parent | 05972c7c4daf0b2479a415bf7240944b999d9081 (diff) | |
| download | passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.tar passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.tar.gz passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.tar.bz2 passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.tar.lz passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.tar.xz passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.tar.zst passt-dd5302dd7bf518aa2c50a9819ee06ea2d6fd0061.zip | |
tcp, flow: Replace per-connection in_epoll flag with an epollid in flow_common
The in_epoll boolean flag in tcp_tap_conn and tcp_splice_conn only tracked
whether a connection was registered with epoll, not which epoll instance.
This limited flexibility for future multi-epoll support.
Replace the boolean with an epollid field in flow_common that identifies
which epoll instance the flow is registered with.
Use FLOW_EPOLLID_INVALID to indicate when a flow is not registered with
any epoll instance. An epoll_id_to_fd[] mapping table translates
epoll ids to their corresponding epoll file descriptors.
Add helper functions:
- flow_in_epoll() to check if a flow is registered with epoll
- flow_epollfd() to retrieve the epoll fd for a flow's thread
- flow_epollid_register() to register an epoll fd with an epollid
- flow_epollid_set() to set the epollid of a flow
- flow_epollid_clear() to reset the epoll id of a flow
This change also simplifies tcp_timer_ctl() and conn_flag_do() by removing
the need to pass the context 'c', since the epoll fd is now directly
accessible from the flow structure via flow_epollfd().
Add a defensive check at the beginning of tcp_flow_repair_queue() to
avoid a false positive with "make clang-tidy":
error: The 1st argument to 'send' is < 0 but should be >= 0
3230 | ssize_t rc = send(conn->sock, p, MIN(len, chunk), 0);
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
| -rw-r--r-- | flow.c | 61 | ||||
| -rw-r--r-- | flow.h | 17 | ||||
| -rw-r--r-- | passt.c | 1 | ||||
| -rw-r--r-- | tcp.c | 44 | ||||
| -rw-r--r-- | tcp_conn.h | 8 | ||||
| -rw-r--r-- | tcp_splice.c | 24 |
6 files changed, 116 insertions, 39 deletions
@@ -116,6 +116,7 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES, unsigned flow_first_free; union flow flowtab[FLOW_MAX]; static const union flow *flow_new_entry; /* = NULL */ +static int epoll_id_to_fd[EPOLLFD_ID_MAX]; /* Hash table to index it */ #define FLOW_HASH_LOAD 70 /* % */ @@ -348,6 +349,63 @@ static void flow_set_state(struct flow_common *f, enum flow_state state) } /** + * flow_in_epoll() - Check if flow is registered with an epoll instance + * @f: Flow to check + * + * Return: true if flow is registered with epoll, false otherwise + */ +bool flow_in_epoll(const struct flow_common *f) +{ + return f->epollid != EPOLLFD_ID_INVALID; +} + +/** + * flow_epollfd() - Get the epoll file descriptor for a flow + * @f: Flow to query + * + * Return: epoll file descriptor associated with the flow's thread + */ +int flow_epollfd(const struct flow_common *f) +{ + ASSERT(f->epollid < EPOLLFD_ID_MAX); + + return epoll_id_to_fd[f->epollid]; +} + +/** + * flow_epollid_set() - Associate a flow with an epoll id + * @f: Flow to update + * @epollid: epoll id to associate with this flow + */ +void flow_epollid_set(struct flow_common *f, int epollid) +{ + ASSERT(epollid < EPOLLFD_ID_MAX); + + f->epollid = epollid; +} + +/** + * flow_epollid_clear() - Clear the flow epoll id + * @f: Flow to update + */ +void flow_epollid_clear(struct flow_common *f) +{ + f->epollid = EPOLLFD_ID_INVALID; +} + +/** + * flow_epollid_register() - Initialize the epoll id -> fd mapping + * @epollid: epoll id to associate to + * @epollfd: epoll file descriptor for this epoll id + */ +void flow_epollid_register(int epollid, int epollfd) +{ + ASSERT(epollid < EPOLLFD_ID_MAX); + + epoll_id_to_fd[epollid] = epollfd; +} + +/** * flow_initiate_() - Move flow to INI, setting pif[INISIDE] * @flow: Flow to change state * @pif: pif of the initiating side @@ -550,6 +608,7 @@ union flow *flow_alloc(void) flow_new_entry = flow; memset(flow, 0, sizeof(*flow)); + flow_epollid_clear(&flow->f); flow_set_state(&flow->f, FLOW_STATE_NEW); return flow; @@ -829,7 +888,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) case FLOW_TCP_SPLICE: closed = tcp_splice_flow_defer(&flow->tcp_splice); if (!closed && timer) - tcp_splice_timer(c, &flow->tcp_splice); + tcp_splice_timer(&flow->tcp_splice); break; case FLOW_PING4: case FLOW_PING6: @@ -177,7 +177,8 @@ int flowside_connect(const struct ctx *c, int s, * @type: Type of packet flow * @pif[]: Interface for each side of the flow * @side[]: Information for each side of the flow - * @tap_omac: MAC address of remote endpoint as seen from the guest + * @tap_omac: MAC address of remote endpoint as seen from the guest + * @epollid: epollfd identifier, or EPOLLFD_ID_INVALID */ struct flow_common { #ifdef __GNUC__ @@ -193,9 +194,18 @@ struct flow_common { #endif uint8_t pif[SIDES]; struct flowside side[SIDES]; + uint8_t tap_omac[6]; + +#define EPOLLFD_ID_BITS 8 + unsigned int epollid:EPOLLFD_ID_BITS; }; +#define EPOLLFD_ID_DEFAULT 0 +#define EPOLLFD_ID_SIZE (1 << EPOLLFD_ID_BITS) +#define EPOLLFD_ID_MAX (EPOLLFD_ID_SIZE - 1) +#define EPOLLFD_ID_INVALID EPOLLFD_ID_MAX + #define FLOW_INDEX_BITS 17 /* 128k - 1 */ #define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS) @@ -251,6 +261,11 @@ flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif, union flow; void flow_init(void); +bool flow_in_epoll(const struct flow_common *f); +int flow_epollfd(const struct flow_common *f); +void flow_epollid_set(struct flow_common *f, int epollid); +void flow_epollid_clear(struct flow_common *f); +void flow_epollid_register(int epollid, int epollfd); void flow_defer_handler(const struct ctx *c, const struct timespec *now); int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage, int fd); @@ -286,6 +286,7 @@ int main(int argc, char **argv) c.epollfd = epoll_create1(EPOLL_CLOEXEC); if (c.epollfd == -1) die_perror("Failed to create epoll file descriptor"); + flow_epollid_register(EPOLLFD_ID_DEFAULT, c.epollfd); if (getrlimit(RLIMIT_NOFILE, &limit)) die_perror("Failed to get maximum value of open files limit"); @@ -504,25 +504,27 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags) */ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) { - int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; + int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock, .flowside = FLOW_SIDX(conn, !TAPSIDE(conn)), }; struct epoll_event ev = { .data.u64 = ref.u64 }; + int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) + : c->epollfd; if (conn->events == CLOSED) { - if (conn->in_epoll) - epoll_del(c->epollfd, conn->sock); + if (flow_in_epoll(&conn->f)) + epoll_del(epollfd, conn->sock); if (conn->timer != -1) - epoll_del(c->epollfd, conn->timer); + epoll_del(epollfd, conn->timer); return 0; } ev.events = tcp_conn_epoll_events(conn->events, conn->flags); - if (epoll_ctl(c->epollfd, m, conn->sock, &ev)) + if (epoll_ctl(epollfd, m, conn->sock, &ev)) return -errno; - conn->in_epoll = true; + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT); if (conn->timer != -1) { union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER, @@ -531,7 +533,8 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) struct epoll_event ev_t = { .data.u64 = ref_t.u64, .events = EPOLLIN | EPOLLET }; - if (epoll_ctl(c->epollfd, EPOLL_CTL_MOD, conn->timer, &ev_t)) + if (epoll_ctl(flow_epollfd(&conn->f), EPOLL_CTL_MOD, + conn->timer, &ev_t)) return -errno; } @@ -540,12 +543,11 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn) /** * tcp_timer_ctl() - Set timerfd based on flags/events, create timerfd if needed - * @c: Execution context * @conn: Connection pointer * * #syscalls timerfd_create timerfd_settime */ -static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) +static void tcp_timer_ctl(struct tcp_tap_conn *conn) { struct itimerspec it = { { 0 }, { 0 } }; @@ -558,6 +560,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) .flow = FLOW_IDX(conn) }; struct epoll_event ev = { .data.u64 = ref.u64, .events = EPOLLIN | EPOLLET }; + int epollfd = flow_epollfd(&conn->f); int fd; fd = timerfd_create(CLOCK_MONOTONIC, 0); @@ -570,7 +573,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn) } conn->timer = fd; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) { + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) { flow_dbg_perror(conn, "failed to add timer"); close(conn->timer); conn->timer = -1; @@ -628,7 +631,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, * flags and factor this into the logic below. */ if (flag == ACK_FROM_TAP_DUE) - tcp_timer_ctl(c, conn); + tcp_timer_ctl(conn); return; } @@ -644,7 +647,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, if (flag == ACK_FROM_TAP_DUE || flag == ACK_TO_TAP_DUE || (flag == ~ACK_FROM_TAP_DUE && (conn->flags & ACK_TO_TAP_DUE)) || (flag == ~ACK_TO_TAP_DUE && (conn->flags & ACK_FROM_TAP_DUE))) - tcp_timer_ctl(c, conn); + tcp_timer_ctl(conn); } /** @@ -699,7 +702,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn, tcp_epoll_ctl(c, conn); if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) - tcp_timer_ctl(c, conn); + tcp_timer_ctl(conn); } /** @@ -1767,7 +1770,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn, seq, conn->seq_from_tap); tcp_send_flag(c, conn, ACK); - tcp_timer_ctl(c, conn); + tcp_timer_ctl(conn); if (p->count == 1) { tcp_tap_window_update(c, conn, @@ -2418,7 +2421,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) if (conn->flags & ACK_TO_TAP_DUE) { tcp_send_flag(c, conn, ACK_IF_NEEDED); - tcp_timer_ctl(c, conn); + tcp_timer_ctl(conn); } else if (conn->flags & ACK_FROM_TAP_DUE) { if (!(conn->events & ESTABLISHED)) { flow_dbg(conn, "handshake timeout"); @@ -2440,7 +2443,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref) return; tcp_data_from_sock(c, conn); - tcp_timer_ctl(c, conn); + tcp_timer_ctl(conn); } } else { struct itimerspec new = { { 0 }, { ACT_TIMEOUT, 0 } }; @@ -3235,6 +3238,11 @@ static int tcp_flow_repair_queue(const struct tcp_tap_conn *conn, size_t chunk = len; uint8_t *p = buf; + if (conn->sock < 0) { + flow_err(conn, "Invalid socket descriptor for repair queue"); + return -EBADF; + } + while (len > 0) { ssize_t rc = send(conn->sock, p, MIN(len, chunk), 0); @@ -3488,7 +3496,7 @@ int tcp_flow_migrate_source_ext(const struct ctx *c, if (c->migrate_no_linger) close(s); else - epoll_del(c->epollfd, s); + epoll_del(flow_epollfd(&conn->f), s); /* Adjustments unrelated to FIN segments: sequence numbers we dumped are * based on the end of the queues. @@ -3637,7 +3645,7 @@ static int tcp_flow_repair_connect(const struct ctx *c, return rc; } - conn->in_epoll = 0; + flow_epollid_clear(&conn->f); conn->timer = -1; conn->listening_sock = -1; @@ -12,7 +12,6 @@ /** * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced) * @f: Generic flow information - * @in_epoll: Is the connection in the epoll set? * @retrans: Number of retransmissions occurred due to ACK_TIMEOUT * @ws_from_tap: Window scaling factor advertised from tap/guest * @ws_to_tap: Window scaling factor advertised to tap/guest @@ -36,8 +35,6 @@ struct tcp_tap_conn { /* Must be first element */ struct flow_common f; - bool in_epoll :1; - #define TCP_RETRANS_BITS 3 unsigned int retrans :TCP_RETRANS_BITS; #define TCP_MAX_RETRANS MAX_FROM_BITS(TCP_RETRANS_BITS) @@ -196,7 +193,6 @@ struct tcp_tap_transfer_ext { * @written: Bytes written (not fully written from one other side read) * @events: Events observed/actions performed on connection * @flags: Connection flags (attributes, not events) - * @in_epoll: Is the connection in the epoll set? */ struct tcp_splice_conn { /* Must be first element */ @@ -220,8 +216,6 @@ struct tcp_splice_conn { #define RCVLOWAT_SET(sidei_) ((sidei_) ? BIT(1) : BIT(0)) #define RCVLOWAT_ACT(sidei_) ((sidei_) ? BIT(3) : BIT(2)) #define CLOSING BIT(4) - - bool in_epoll :1; }; /* Socket pools */ @@ -245,7 +239,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd bool tcp_flow_is_established(const struct tcp_tap_conn *conn); bool tcp_splice_flow_defer(struct tcp_splice_conn *conn); -void tcp_splice_timer(const struct ctx *c, struct tcp_splice_conn *conn); +void tcp_splice_timer(struct tcp_splice_conn *conn); int tcp_conn_pool_sock(int pool[]); int tcp_conn_sock(sa_family_t af); int tcp_sock_refill_pool(int pool[], sa_family_t af); diff --git a/tcp_splice.c b/tcp_splice.c index 6f21184..5efd859 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -149,7 +149,9 @@ static void tcp_splice_conn_epoll_events(uint16_t events, static int tcp_splice_epoll_ctl(const struct ctx *c, struct tcp_splice_conn *conn) { - int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; + int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) + : c->epollfd; + int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; const union epoll_ref ref[SIDES] = { { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0], .flowside = FLOW_SIDX(conn, 0) }, @@ -161,25 +163,24 @@ static int tcp_splice_epoll_ctl(const struct ctx *c, tcp_splice_conn_epoll_events(conn->events, ev); - if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) || - epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) { + + if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) || + epoll_ctl(epollfd, m, conn->s[1], &ev[1])) { int ret = -errno; flow_perror(conn, "ERROR on epoll_ctl()"); return ret; } - - conn->in_epoll = true; + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT); return 0; } /** * conn_flag_do() - Set/unset given flag, log, update epoll on CLOSING flag - * @c: Execution context * @conn: Connection pointer * @flag: Flag to set, or ~flag to unset */ -static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn, +static void conn_flag_do(struct tcp_splice_conn *conn, unsigned long flag) { if (flag & (flag - 1)) { @@ -204,15 +205,15 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn, } if (flag == CLOSING) { - epoll_del(c->epollfd, conn->s[0]); - epoll_del(c->epollfd, conn->s[1]); + epoll_del(flow_epollfd(&conn->f), conn->s[0]); + epoll_del(flow_epollfd(&conn->f), conn->s[1]); } } #define conn_flag(c, conn, flag) \ do { \ flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \ - conn_flag_do(c, conn, flag); \ + conn_flag_do(conn, flag); \ } while (0) /** @@ -751,10 +752,9 @@ void tcp_splice_init(struct ctx *c) /** * tcp_splice_timer() - Timer for spliced connections - * @c: Execution context * @conn: Connection to handle */ -void tcp_splice_timer(const struct ctx *c, struct tcp_splice_conn *conn) +void tcp_splice_timer(struct tcp_splice_conn *conn) { unsigned sidei; |
