aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--flow.c146
-rw-r--r--flow.h81
-rw-r--r--flow_table.h9
-rw-r--r--icmp.c4
-rw-r--r--tcp.c8
-rw-r--r--tcp_splice.c4
6 files changed, 183 insertions, 69 deletions
diff --git a/flow.c b/flow.c
index e257f89..da71fe1 100644
--- a/flow.c
+++ b/flow.c
@@ -18,6 +18,15 @@
#include "flow.h"
#include "flow_table.h"
+const char *flow_state_str[] = {
+ [FLOW_STATE_FREE] = "FREE",
+ [FLOW_STATE_NEW] = "NEW",
+ [FLOW_STATE_TYPED] = "TYPED",
+ [FLOW_STATE_ACTIVE] = "ACTIVE",
+};
+static_assert(ARRAY_SIZE(flow_state_str) == FLOW_NUM_STATES,
+ "flow_state_str[] doesn't match enum flow_state");
+
const char *flow_type_str[] = {
[FLOW_TYPE_NONE] = "<none>",
[FLOW_TCP] = "TCP connection",
@@ -40,46 +49,6 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
/* Global Flow Table */
/**
- * DOC: Theory of Operation - flow entry life cycle
- *
- * An individual flow table entry moves through these logical states, usually in
- * this order.
- *
- * FREE - Part of the general pool of free flow table entries
- * Operations:
- * - flow_alloc() finds an entry and moves it to ALLOC state
- *
- * ALLOC - A tentatively allocated entry
- * Operations:
- * - flow_alloc_cancel() returns the entry to FREE state
- * - FLOW_START() set the entry's type and moves to START state
- * Caveats:
- * - It's not safe to write fields in the flow entry
- * - It's not safe to allocate further entries with flow_alloc()
- * - It's not safe to return to the main epoll loop (use FLOW_START()
- * to move to START state before doing so)
- * - It's not safe to use flow_*() logging functions
- *
- * START - An entry being prepared by flow type specific code
- * Operations:
- * - Flow type specific fields may be accessed
- * - flow_*() logging functions
- * - flow_alloc_cancel() returns the entry to FREE state
- * Caveats:
- * - Returning to the main epoll loop or allocating another entry
- * with flow_alloc() implicitly moves the entry to ACTIVE state.
- *
- * ACTIVE - An active flow entry managed by flow type specific code
- * Operations:
- * - Flow type specific fields may be accessed
- * - flow_*() logging functions
- * - Flow may be expired by returning 'true' from flow type specific
- * deferred or timer handler. This will return it to FREE state.
- * Caveats:
- * - It's not safe to call flow_alloc_cancel()
- */
-
-/**
* DOC: Theory of Operation - allocating and freeing flow entries
*
* Flows are entries in flowtab[]. We need to routinely scan the whole table to
@@ -132,6 +101,7 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
unsigned flow_first_free;
union flow flowtab[FLOW_MAX];
+static const union flow *flow_new_entry; /* = NULL */
/* Last time the flow timers ran */
static struct timespec flow_timer_run;
@@ -144,6 +114,7 @@ static struct timespec flow_timer_run;
*/
void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
{
+ const char *type_or_state;
char msg[BUFSIZ];
va_list args;
@@ -151,40 +122,65 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
(void)vsnprintf(msg, sizeof(msg), fmt, args);
va_end(args);
- logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg);
+ /* Show type if it's set, otherwise the state */
+ if (f->state < FLOW_STATE_TYPED)
+ type_or_state = FLOW_STATE(f);
+ else
+ type_or_state = FLOW_TYPE(f);
+
+ logmsg(pri, "Flow %u (%s): %s", flow_idx(f), type_or_state, msg);
+}
+
+/**
+ * flow_set_state() - Change flow's state
+ * @f: Flow changing state
+ * @state: New state
+ */
+static void flow_set_state(struct flow_common *f, enum flow_state state)
+{
+ uint8_t oldstate = f->state;
+
+ ASSERT(state < FLOW_NUM_STATES);
+ ASSERT(oldstate < FLOW_NUM_STATES);
+
+ f->state = state;
+ flow_log_(f, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
+ FLOW_STATE(f));
}
/**
- * flow_start() - Set flow type for new flow and log
- * @flow: Flow to set type for
+ * flow_set_type() - Set type and move to TYPED
+ * @flow: Flow to change state
* @type: Type for new flow
* @iniside: Which side initiated the new flow
*
* Return: @flow
- *
- * Should be called before setting any flow type specific fields in the flow
- * table entry.
*/
-union flow *flow_start(union flow *flow, enum flow_type type,
- unsigned iniside)
+union flow *flow_set_type(union flow *flow, enum flow_type type,
+ unsigned iniside)
{
+ struct flow_common *f = &flow->f;
+
+ ASSERT(type != FLOW_TYPE_NONE);
+ ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_NEW);
+ ASSERT(f->type == FLOW_TYPE_NONE);
+
(void)iniside;
- flow->f.type = type;
- flow_dbg(flow, "START %s", flow_type_str[flow->f.type]);
+ f->type = type;
+ flow_set_state(f, FLOW_STATE_TYPED);
return flow;
}
/**
- * flow_end() - Clear flow type for finished flow and log
- * @flow: Flow to clear
+ * flow_activate() - Move flow to ACTIVE
+ * @f: Flow to change state
*/
-static void flow_end(union flow *flow)
+void flow_activate(struct flow_common *f)
{
- if (flow->f.type == FLOW_TYPE_NONE)
- return; /* Nothing to do */
+ ASSERT(&flow_new_entry->f == f && f->state == FLOW_STATE_TYPED);
- flow_dbg(flow, "END %s", flow_type_str[flow->f.type]);
- flow->f.type = FLOW_TYPE_NONE;
+ flow_set_state(f, FLOW_STATE_ACTIVE);
+ flow_new_entry = NULL;
}
/**
@@ -196,9 +192,12 @@ union flow *flow_alloc(void)
{
union flow *flow = &flowtab[flow_first_free];
+ ASSERT(!flow_new_entry);
+
if (flow_first_free >= FLOW_MAX)
return NULL;
+ ASSERT(flow->f.state == FLOW_STATE_FREE);
ASSERT(flow->f.type == FLOW_TYPE_NONE);
ASSERT(flow->free.n >= 1);
ASSERT(flow_first_free + flow->free.n <= FLOW_MAX);
@@ -221,7 +220,10 @@ union flow *flow_alloc(void)
flow_first_free = flow->free.next;
}
+ flow_new_entry = flow;
memset(flow, 0, sizeof(*flow));
+ flow_set_state(&flow->f, FLOW_STATE_NEW);
+
return flow;
}
@@ -233,15 +235,21 @@ union flow *flow_alloc(void)
*/
void flow_alloc_cancel(union flow *flow)
{
+ ASSERT(flow_new_entry == flow);
+ ASSERT(flow->f.state == FLOW_STATE_NEW ||
+ flow->f.state == FLOW_STATE_TYPED);
ASSERT(flow_first_free > FLOW_IDX(flow));
- flow_end(flow);
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
+
/* Put it back in a length 1 free cluster, don't attempt to fully
* reverse flow_alloc()s steps. This will get folded together the next
* time flow_defer_handler runs anyway() */
flow->free.n = 1;
flow->free.next = flow_first_free;
flow_first_free = FLOW_IDX(flow);
+ flow_new_entry = NULL;
}
/**
@@ -261,11 +269,14 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
flow_timer_run = *now;
}
+ ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
+
for (idx = 0; idx < FLOW_MAX; idx++) {
union flow *flow = &flowtab[idx];
bool closed = false;
- if (flow->f.type == FLOW_TYPE_NONE) {
+ switch (flow->f.state) {
+ case FLOW_STATE_FREE: {
unsigned skip = flow->free.n;
/* First entry of a free cluster must have n >= 1 */
@@ -287,6 +298,20 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
continue;
}
+ case FLOW_STATE_NEW:
+ case FLOW_STATE_TYPED:
+ /* Incomplete flow at end of cycle */
+ ASSERT(false);
+ break;
+
+ case FLOW_STATE_ACTIVE:
+ /* Nothing to do */
+ break;
+
+ default:
+ ASSERT(false);
+ }
+
switch (flow->f.type) {
case FLOW_TYPE_NONE:
ASSERT(false);
@@ -310,7 +335,8 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
}
if (closed) {
- flow_end(flow);
+ flow_set_state(&flow->f, FLOW_STATE_FREE);
+ memset(flow, 0, sizeof(*flow));
if (free_head) {
/* Add slot to current free cluster */
diff --git a/flow.h b/flow.h
index c943c44..e61d35e 100644
--- a/flow.h
+++ b/flow.h
@@ -10,6 +10,68 @@
#define FLOW_TIMER_INTERVAL 1000 /* ms */
/**
+ * enum flow_state - States of a flow table entry
+ *
+ * An individual flow table entry moves through these states, usually in this
+ * order.
+ * General rules:
+ * - Code outside flow.c should never write common fields of union flow.
+ * - The state field may always be read.
+ *
+ * FREE - Part of the general pool of free flow table entries
+ * Operations:
+ * - flow_alloc() finds an entry and moves it to NEW
+ *
+ * NEW - Freshly allocated, uninitialised entry
+ * Operations:
+ * - flow_alloc_cancel() returns the entry to FREE
+ * - FLOW_SET_TYPE() sets the entry's type and moves to TYPED
+ * Caveats:
+ * - No fields other than state may be accessed
+ * - At most one entry may be NEW or TYPED at a time, so it's unsafe
+ * to use flow_alloc() again until this entry moves to ACTIVE or
+ * FREE
+ * - You may not return to the main epoll loop while any flow is NEW
+ *
+ * TYPED - Generic info initialised, type specific initialisation underway
+ * Operations:
+ * - All common fields may be read
+ * - Type specific fields may be read and written
+ * - flow_alloc_cancel() returns the entry to FREE
+ * - FLOW_ACTIVATE() moves the entry to ACTIVE
+ * Caveats:
+ * - At most one entry may be NEW or TYPED at a time, so it's unsafe
+ * to use flow_alloc() again until this entry moves to ACTIVE or
+ * FREE
+ * - You may not return to the main epoll loop while any flow is
+ * TYPED
+ *
+ * ACTIVE - An active, fully-initialised flow entry
+ * Operations:
+ * - All common fields may be read
+ * - Type specific fields may be read and written
+ * - Flow returns to FREE when it expires, signalled by returning
+ * 'true' from flow type specific deferred or timer handler
+ * Caveats:
+ * - flow_alloc_cancel() may not be called on it
+ */
+enum flow_state {
+ FLOW_STATE_FREE,
+ FLOW_STATE_NEW,
+ FLOW_STATE_TYPED,
+ FLOW_STATE_ACTIVE,
+
+ FLOW_NUM_STATES,
+};
+#define FLOW_STATE_BITS 8
+static_assert(FLOW_NUM_STATES <= (1 << FLOW_STATE_BITS),
+ "Too many flow states for FLOW_STATE_BITS");
+
+extern const char *flow_state_str[];
+#define FLOW_STATE(f) \
+ ((f)->state < FLOW_NUM_STATES ? flow_state_str[(f)->state] : "?")
+
+/**
* enum flow_type - Different types of packet flows we track
*/
enum flow_type {
@@ -26,6 +88,9 @@ enum flow_type {
FLOW_NUM_TYPES,
};
+#define FLOW_TYPE_BITS 8
+static_assert(FLOW_NUM_TYPES <= (1 << FLOW_TYPE_BITS),
+ "Too many flow types for FLOW_TYPE_BITS");
extern const char *flow_type_str[];
#define FLOW_TYPE(f) \
@@ -37,10 +102,21 @@ extern const uint8_t flow_proto[];
/**
* struct flow_common - Common fields for packet flows
+ * @state: State of the flow table entry
* @type: Type of packet flow
*/
struct flow_common {
+#ifdef __GNUC__
+ enum flow_state state:FLOW_STATE_BITS;
+ enum flow_type type:FLOW_TYPE_BITS;
+#else
+ uint8_t state;
+ static_assert(sizeof(uint8_t) * 8 >= FLOW_STATE_BITS,
+ "Not enough bits for state field");
uint8_t type;
+ static_assert(sizeof(uint8_t) * 8 >= FLOW_TYPE_BITS,
+ "Not enough bits for type field");
+#endif
};
#define FLOW_INDEX_BITS 17 /* 128k - 1 */
@@ -49,11 +125,6 @@ struct flow_common {
#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
-union flow *flow_start(union flow *flow, enum flow_type type,
- unsigned iniside);
-#define FLOW_START(flow_, t_, var_, i_) \
- (&flow_start((flow_), (t_), (i_))->var_)
-
/**
* struct flow_sidx - ID for one side of a specific flow
* @side: Side referenced (0 or 1)
diff --git a/flow_table.h b/flow_table.h
index b7e5529..17b9ea7 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -107,4 +107,13 @@ static inline flow_sidx_t flow_sidx(const struct flow_common *f,
union flow *flow_alloc(void);
void flow_alloc_cancel(union flow *flow);
+union flow *flow_set_type(union flow *flow, enum flow_type type,
+ unsigned iniside);
+#define FLOW_SET_TYPE(flow_, t_, var_, i_) \
+ (&flow_set_type((flow_), (t_), (i_))->var_)
+
+void flow_activate(struct flow_common *f);
+#define FLOW_ACTIVATE(flow_) \
+ (flow_activate(&(flow_)->f))
+
#endif /* FLOW_TABLE_H */
diff --git a/icmp.c b/icmp.c
index eb559c1..fc72955 100644
--- a/icmp.c
+++ b/icmp.c
@@ -167,7 +167,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
if (!flow)
return NULL;
- pingf = FLOW_START(flow, flowtype, ping, TAPSIDE);
+ pingf = FLOW_SET_TYPE(flow, flowtype, ping, TAPSIDE);
pingf->seq = -1;
pingf->id = id;
@@ -198,6 +198,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
*id_sock = pingf;
+ FLOW_ACTIVATE(pingf);
+
return pingf;
cancel:
diff --git a/tcp.c b/tcp.c
index d0ce202..398bda7 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2004,7 +2004,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
goto cancel;
}
- conn = FLOW_START(flow, FLOW_TCP, tcp, TAPSIDE);
+ conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp, TAPSIDE);
conn->sock = s;
conn->timer = -1;
conn_event(c, conn, TAP_SYN_RCVD);
@@ -2075,6 +2075,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
}
tcp_epoll_ctl(c, conn);
+ FLOW_ACTIVATE(conn);
return;
cancel:
@@ -2715,7 +2716,8 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
const union sockaddr_inany *sa,
const struct timespec *now)
{
- struct tcp_tap_conn *conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE);
+ struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp,
+ SOCKSIDE);
conn->sock = s;
conn->timer = -1;
@@ -2738,6 +2740,8 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
conn_flag(c, conn, ACK_FROM_TAP_DUE);
tcp_get_sndbuf(conn);
+
+ FLOW_ACTIVATE(conn);
}
/**
diff --git a/tcp_splice.c b/tcp_splice.c
index fb00bc2..852a93b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -471,7 +471,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
return false;
}
- conn = FLOW_START(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
+ conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
conn->flags = af == AF_INET ? 0 : SPLICE_V6;
conn->s[0] = s0;
@@ -485,6 +485,8 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
if (tcp_splice_connect(c, conn, af, pif1, dstport))
conn_flag(c, conn, CLOSING);
+ FLOW_ACTIVATE(conn);
+
return true;
}