aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2024-02-28 22:25:10 +1100
committerStefano Brivio <sbrivio@redhat.com>2024-02-29 09:48:01 +0100
commit0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8 (patch)
tree787aef747d60b8aacb3e1d472aeac8b69994ad8c
parentd0550f97cd2f38c74806c10904341584f0c0a2ad (diff)
downloadpasst-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.tar
passt-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.tar.gz
passt-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.tar.bz2
passt-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.tar.lz
passt-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.tar.xz
passt-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.tar.zst
passt-0f938c3b9a9dbc854c1d2e33fab5af41b4a660c8.zip
flow: Clarify flow entry life cycle, introduce uniform logging
Our allocation scheme for flow entries means there are some non-obvious constraints on when what things can be done with an entry. Add a big doc comment explaining the life cycle. In addition, make a FLOW_START() macro to mark one of the important transitions. This encourages correct usage, by making it natural to only access the flow type specific structure after calling it. It also logs that a new flow has been created, which is useful for debugging. We also add logging when a flow's lifecycle ends. This doesn't need a new helper, because it can only happen either from flow_alloc_cancel() or from the flow deferred handler. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rw-r--r--flow.c77
-rw-r--r--flow.h5
-rw-r--r--tcp.c15
-rw-r--r--tcp_splice.c11
-rw-r--r--tcp_splice.h5
5 files changed, 95 insertions, 18 deletions
diff --git a/flow.c b/flow.c
index beb9749..d7974d5 100644
--- a/flow.c
+++ b/flow.c
@@ -35,6 +35,46 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
/* Global Flow Table */
/**
+ * DOC: Theory of Operation - flow entry life cycle
+ *
+ * An individual flow table entry moves through these logical states, usually in
+ * this order.
+ *
+ * FREE - Part of the general pool of free flow table entries
+ * Operations:
+ * - flow_alloc() finds an entry and moves it to ALLOC state
+ *
+ * ALLOC - A tentatively allocated entry
+ * Operations:
+ * - flow_alloc_cancel() returns the entry to FREE state
+ * - FLOW_START() set the entry's type and moves to START state
+ * Caveats:
+ * - It's not safe to write fields in the flow entry
+ * - It's not safe to allocate further entries with flow_alloc()
+ * - It's not safe to return to the main epoll loop (use FLOW_START()
+ * to move to START state before doing so)
+ * - It's not safe to use flow_*() logging functions
+ *
+ * START - An entry being prepared by flow type specific code
+ * Operations:
+ * - Flow type specific fields may be accessed
+ * - flow_*() logging functions
+ * - flow_alloc_cancel() returns the entry to FREE state
+ * Caveats:
+ * - Returning to the main epoll loop or allocating another entry
+ * with flow_alloc() implicitly moves the entry to ACTIVE state.
+ *
+ * ACTIVE - An active flow entry managed by flow type specific code
+ * Operations:
+ * - Flow type specific fields may be accessed
+ * - flow_*() logging functions
+ * - Flow may be expired by returning 'true' from flow type specific
+ * deferred or timer handler. This will return it to FREE state.
+ * Caveats:
+ * - It's not safe to call flow_alloc_cancel()
+ */
+
+/**
* DOC: Theory of Operation - allocating and freeing flow entries
*
* Flows are entries in flowtab[]. We need to routinely scan the whole table to
@@ -110,6 +150,39 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
}
/**
+ * flow_start() - Set flow type for new flow and log
+ * @flow: Flow to set type for
+ * @type: Type for new flow
+ * @iniside: Which side initiated the new flow
+ *
+ * Return: @flow
+ *
+ * Should be called before setting any flow type specific fields in the flow
+ * table entry.
+ */
+union flow *flow_start(union flow *flow, enum flow_type type,
+ unsigned iniside)
+{
+ (void)iniside;
+ flow->f.type = type;
+ flow_dbg(flow, "START %s", flow_type_str[flow->f.type]);
+ return flow;
+}
+
+/**
+ * flow_end() - Clear flow type for finished flow and log
+ * @flow: Flow to clear
+ */
+static void flow_end(union flow *flow)
+{
+ if (flow->f.type == FLOW_TYPE_NONE)
+ return; /* Nothing to do */
+
+ flow_dbg(flow, "END %s", flow_type_str[flow->f.type]);
+ flow->f.type = FLOW_TYPE_NONE;
+}
+
+/**
* flow_alloc() - Allocate a new flow
*
* Return: pointer to an unused flow entry, or NULL if the table is full
@@ -157,7 +230,7 @@ void flow_alloc_cancel(union flow *flow)
{
ASSERT(flow_first_free > FLOW_IDX(flow));
- flow->f.type = FLOW_TYPE_NONE;
+ flow_end(flow);
/* Put it back in a length 1 free cluster, don't attempt to fully
* reverse flow_alloc()s steps. This will get folded together the next
* time flow_defer_handler runs anyway() */
@@ -227,7 +300,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
}
if (closed) {
- flow->f.type = FLOW_TYPE_NONE;
+ flow_end(flow);
if (free_head) {
/* Add slot to current free cluster */
diff --git a/flow.h b/flow.h
index e9b3ce3..8b66751 100644
--- a/flow.h
+++ b/flow.h
@@ -45,6 +45,11 @@ struct flow_common {
#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
+union flow *flow_start(union flow *flow, enum flow_type type,
+ unsigned iniside);
+#define FLOW_START(flow_, t_, var_, i_) \
+ (&flow_start((flow_), (t_), (i_))->var_)
+
/**
* struct flow_sidx - ID for one side of a specific flow
* @side: Side referenced (0 or 1)
diff --git a/tcp.c b/tcp.c
index e8f4da4..91163b8 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1976,8 +1976,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
goto cancel;
}
- conn = &flow->tcp;
- conn->f.type = FLOW_TCP;
+ conn = FLOW_START(flow, FLOW_TCP, tcp, TAPSIDE);
conn->sock = s;
conn->timer = -1;
conn_event(c, conn, TAP_SYN_RCVD);
@@ -2681,18 +2680,19 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
* tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
* @c: Execution context
* @ref: epoll reference of listening socket
- * @conn: connection structure to initialize
+ * @flow: flow to initialise
* @s: Accepted socket
* @sa: Peer socket address (from accept())
* @now: Current timestamp
*/
static void tcp_tap_conn_from_sock(struct ctx *c,
union tcp_listen_epoll_ref ref,
- struct tcp_tap_conn *conn, int s,
+ union flow *flow, int s,
const union sockaddr_inany *sa,
const struct timespec *now)
{
- conn->f.type = FLOW_TCP;
+ struct tcp_tap_conn *conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE);
+
conn->sock = s;
conn->timer = -1;
conn->ws_to_tap = conn->ws_from_tap = 0;
@@ -2738,11 +2738,10 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
goto cancel;
if (c->mode == MODE_PASTA &&
- tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
- s, &sa))
+ tcp_splice_conn_from_sock(c, ref.tcp_listen, flow, s, &sa))
return;
- tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, &sa, now);
+ tcp_tap_conn_from_sock(c, ref.tcp_listen, flow, s, &sa, now);
return;
cancel:
diff --git a/tcp_splice.c b/tcp_splice.c
index 49585f2..2411a94 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -432,7 +432,7 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
* tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
* @c: Execution context
* @ref: epoll reference of listening socket
- * @conn: connection structure to initialize
+ * @flow: flow to initialise
* @s: Accepted socket
* @sa: Peer address of connection
*
@@ -440,10 +440,10 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
* #syscalls:pasta setsockopt
*/
bool tcp_splice_conn_from_sock(const struct ctx *c,
- union tcp_listen_epoll_ref ref,
- struct tcp_splice_conn *conn, int s,
- const union sockaddr_inany *sa)
+ union tcp_listen_epoll_ref ref, union flow *flow,
+ int s, const union sockaddr_inany *sa)
{
+ struct tcp_splice_conn *conn;
union inany_addr aany;
in_port_t port;
@@ -453,7 +453,8 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
if (!inany_is_loopback(&aany))
return false;
- conn->f.type = FLOW_TCP_SPLICE;
+ conn = FLOW_START(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
+
conn->flags = inany_v4(&aany) ? 0 : SPLICE_V6;
conn->s[0] = s;
conn->s[1] = -1;
diff --git a/tcp_splice.h b/tcp_splice.h
index 20f41b3..5a471af 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -12,9 +12,8 @@ union sockaddr_inany;
void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
uint32_t events);
bool tcp_splice_conn_from_sock(const struct ctx *c,
- union tcp_listen_epoll_ref ref,
- struct tcp_splice_conn *conn, int s,
- const union sockaddr_inany *sa);
+ union tcp_listen_epoll_ref ref, union flow *flow,
+ int s, const union sockaddr_inany *sa);
void tcp_splice_init(struct ctx *c);
#endif /* TCP_SPLICE_H */