aboutgitcodebugslistschat
diff options
context:
space:
mode:
-rw-r--r--tcp.c204
1 files changed, 169 insertions, 35 deletions
diff --git a/tcp.c b/tcp.c
index e9d3be7..445cae8 100644
--- a/tcp.c
+++ b/tcp.c
@@ -321,6 +321,9 @@
/* Approximately maximum number of open descriptors per process */
#define MAX_CONNS (256 * 1024)
+#define TCP_HASH_TABLE_LOAD 70 /* % */
+#define TCP_HASH_TABLE_SIZE (MAX_CONNS * 100 / TCP_HASH_TABLE_LOAD)
+
#define MAX_WS 10
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
#define MSS_DEFAULT 536
@@ -373,8 +376,13 @@ static char *tcp_state_str[FIN_WAIT_1_SOCK_FIN + 1] = {
#define OPT_SACK 5
#define OPT_TS 8
+struct tcp_conn;
+
/**
* struct tcp_conn - Descriptor for a TCP connection
+ * @next: Pointer to next item in hash chain, if any
+ * @sock: Socket descriptor number
+ * @hash_bucket: Bucket index in socket lookup hash table
* @a.a6: IPv6 remote address, can be IPv4-mapped
* @a.a4.zero: Zero prefix for IPv4-mapped, see RFC 6890, Table 20
* @a.a4.one: Ones prefix for IPv4-mapped
@@ -398,6 +406,10 @@ static char *tcp_state_str[FIN_WAIT_1_SOCK_FIN + 1] = {
* @mss_guest: Maximum segment size advertised by guest
*/
struct tcp_conn {
+ struct tcp_conn *next;
+ int sock;
+ int hash_bucket;
+
union {
struct in6_addr a6;
struct {
@@ -429,9 +441,19 @@ struct tcp_conn {
int mss_guest;
};
+/* Socket receive buffer */
static char sock_buf[MAX_WINDOW];
+
+/* Bitmap, activity monitoring needed for connection, indexed by socket */
static uint8_t tcp_act[MAX_CONNS / 8] = { 0 };
+
+/* TCP connections, indexed by socket */
static struct tcp_conn tc[MAX_CONNS];
+
+/* Hash table for socket lookup given remote address, local port, remote port */
+static int tc_hash[TCP_HASH_TABLE_SIZE];
+
+/* 128-bit secret for hash functions, for initial sequence numbers and table */
static uint64_t hash_secret[2];
static int tcp_send_to_tap(struct ctx *c, int s, int flags, char *in, int len);
@@ -525,7 +547,143 @@ static int tcp_opt_get(struct tcphdr *th, size_t len, uint8_t __type,
}
/**
- * tcp_close_and_epoll_del() - Close socket and remove from epoll descriptor
+ * tcp_sock_hash_match() - Check if a connection entry matches address and ports
+ * @conn: Connection entry to match against
+ * @af: Address family, AF_INET or AF_INET6
+ * @addr: Remote address, pointer to sin_addr or sin6_addr
+ * @tap_port: tap-facing port
+ * @sock_port: Socket-facing port
+ *
+ * Return: 1 on match, 0 otherwise
+ */
+static int tcp_sock_hash_match(struct tcp_conn *conn, int af, void *addr,
+ in_port_t tap_port, in_port_t sock_port)
+{
+ if (af == AF_INET && IN6_IS_ADDR_V4MAPPED(&conn->a.a6) &&
+ !memcmp(&conn->a.a4.a, addr, sizeof(conn->a.a4.a)) &&
+ conn->tap_port == tap_port && conn->sock_port == sock_port)
+ return 1;
+
+ if (af == AF_INET6 &&
+ !memcmp(&conn->a.a6, addr, sizeof(conn->a.a6)) &&
+ conn->tap_port == tap_port && conn->sock_port == sock_port)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * tcp_sock_hash() - Calculate hash value for connection given address and ports
+ * @af: Address family, AF_INET or AF_INET6
+ * @addr: Remote address, pointer to sin_addr or sin6_addr
+ * @tap_port: tap-facing port
+ * @sock_port: Socket-facing port
+ *
+ * Return: hash value, already modulo size of the hash table
+ */
+static unsigned int tcp_sock_hash(int af, void *addr,
+ in_port_t tap_port, in_port_t sock_port)
+{
+ uint64_t b;
+
+ if (af == AF_INET) {
+ struct {
+ struct in_addr addr;
+ in_port_t tap_port;
+ in_port_t sock_port;
+ } __attribute__((__packed__)) in = {
+ .addr = *(struct in_addr *)addr,
+ .tap_port = tap_port,
+ .sock_port = sock_port,
+ };
+
+ b = siphash_8b((uint8_t *)&in, hash_secret);
+ } else if (af == AF_INET6) {
+ struct {
+ struct in6_addr addr;
+ in_port_t tap_port;
+ in_port_t sock_port;
+ } __attribute__((__packed__)) in = {
+ .addr = *(struct in6_addr *)addr,
+ .tap_port = tap_port,
+ .sock_port = sock_port,
+ };
+
+ b = siphash_20b((uint8_t *)&in, hash_secret);
+ }
+
+ return (unsigned int)(b % TCP_HASH_TABLE_SIZE);
+}
+
+/**
+ * tcp_sock_hash_insert() - Insert socket into hash table, chain link if needed
+ * @s: File descriptor number for socket
+ * @af: Address family, AF_INET or AF_INET6
+ * @addr: Remote address, pointer to sin_addr or sin6_addr
+ * @tap_port: tap-facing port
+ * @sock_port: Socket-facing port
+ */
+static void tcp_sock_hash_insert(int s, int af, void *addr,
+ in_port_t tap_port, in_port_t sock_port)
+{
+ int b;
+
+ b = tcp_sock_hash(af, addr, tap_port, sock_port);
+ tc[s].next = tc_hash[b] ? &tc[tc_hash[b]] : NULL;
+ tc_hash[b] = tc[s].sock = s;
+ tc[s].hash_bucket = b;
+}
+
+/**
+ * tcp_sock_hash_remove() - Drop socket from hash table, chain unlink if needed
+ * @b: Bucket index
+ * @s: File descriptor number for socket
+ */
+static void tcp_sock_hash_remove(int b, int s)
+{
+ struct tcp_conn *conn, *prev = NULL;
+
+ for (conn = &tc[tc_hash[b]]; conn; prev = conn, conn = conn->next) {
+ if (conn->sock == s) {
+ conn->sock = 0;
+ if (prev)
+ prev->next = conn->next;
+ else
+ tc_hash[b] = conn->next ? conn->next->sock : 0;
+ return;
+ }
+ }
+}
+
+/**
+ * tcp_sock_hash_lookup() - Look up socket given remote address and ports
+ * @af: Address family, AF_INET or AF_INET6
+ * @addr: Remote address, pointer to sin_addr or sin6_addr
+ * @tap_port: tap-facing port
+ * @sock_port: Socket-facing port
+ *
+ * Return: file descriptor number for socket, if found, -ENOENT otherwise
+ */
+static int tcp_sock_hash_lookup(int af, void *addr,
+ in_port_t tap_port, in_port_t sock_port)
+{
+ struct tcp_conn *conn;
+ int b;
+
+ b = tcp_sock_hash(af, addr, tap_port, sock_port);
+ if (!tc_hash[b])
+ return -ENOENT;
+
+ for (conn = &tc[tc_hash[b]]; conn; conn = conn->next) {
+ if (tcp_sock_hash_match(conn, af, addr, tap_port, sock_port))
+ return conn->sock;
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * tcp_close_and_epoll_del() - Close, remove socket from hash table and epoll fd
* @c: Execution context
* @s: File descriptor number for socket
*/
@@ -534,6 +692,7 @@ static void tcp_close_and_epoll_del(struct ctx *c, int s)
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, s, NULL);
tcp_set_state(s, CLOSED);
close(s);
+ tcp_sock_hash_remove(tc[s].hash_bucket, tc[s].sock);
tcp_act_clear(s);
}
@@ -804,6 +963,8 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
tc[s].seq_to_tap = tcp_seq_init(c, af, addr, th->dest, th->source);
tc[s].seq_ack_from_tap = tc[s].seq_to_tap + 1;
+ tcp_sock_hash_insert(s, af, addr, th->source, th->dest);
+
if (connect(s, sa, sl)) {
if (errno != EINPROGRESS) {
tcp_rst(c, s);
@@ -823,39 +984,6 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
}
/**
- * tcp_sock_lookup() - Look up socket given remote address and pair of ports
- * @af: Address family, AF_INET or AF_INET6
- * @tap_port: tap-facing port
- * @sock_port: Socket-facing port
- *
- * Return: file descriptor number for socket, if found, -ENOENT otherwise
- */
-static int tcp_sock_lookup(int af, void *addr,
- in_port_t tap_port, in_port_t sock_port)
-{
- int i;
-
- /* TODO: hash table and lookup. This is just a dummy implementation. */
- for (i = 0; i < MAX_CONNS; i++) {
- if (af == AF_INET && IN6_IS_ADDR_V4MAPPED(&tc[i].a.a6) &&
- !memcmp(&tc[i].a.a4.a, addr, sizeof(tc[i].a.a4.a)) &&
- tc[i].tap_port == tap_port &&
- tc[i].sock_port == sock_port &&
- tc[i].s)
- return i;
-
- if (af == AF_INET6 &&
- !memcmp(&tc[i].a.a6, addr, sizeof(tc[i].a.a6)) &&
- tc[i].tap_port == tap_port &&
- tc[i].sock_port == sock_port &&
- tc[i].s)
- return i;
- }
-
- return -ENOENT;
-}
-
-/**
* tcp_conn_from_sock() - Handle new connection request from listening socket
* @c: Execution context
* @fd: File descriptor number for listening socket
@@ -887,6 +1015,9 @@ static void tcp_conn_from_sock(struct ctx *c, int fd)
tc[s].seq_to_tap = tcp_seq_init(c, AF_INET, &sa4->sin_addr,
tc[s].sock_port,
tc[s].tap_port);
+
+ tcp_sock_hash_insert(s, AF_INET, &sa4->sin_addr,
+ tc[s].tap_port, tc[s].sock_port);
} else if (sa_l.ss_family == AF_INET6) {
struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&sa_r;
@@ -898,6 +1029,9 @@ static void tcp_conn_from_sock(struct ctx *c, int fd)
tc[s].seq_to_tap = tcp_seq_init(c, AF_INET6, &sa6->sin6_addr,
tc[s].sock_port,
tc[s].tap_port);
+
+ tcp_sock_hash_insert(s, AF_INET6, &sa6->sin6_addr,
+ tc[s].tap_port, tc[s].sock_port);
}
tc[s].seq_ack_from_tap = tc[s].seq_to_tap + 1;
@@ -1067,7 +1201,7 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
if (off < sizeof(*th) || off > len)
return;
- if ((s = tcp_sock_lookup(af, addr, th->source, th->dest)) < 0) {
+ if ((s = tcp_sock_hash_lookup(af, addr, th->source, th->dest)) < 0) {
if (th->syn)
tcp_conn_from_tap(c, af, addr, th, len);
return;