aboutgitcodebugslistschat
path: root/tcp_splice.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcp_splice.c')
-rw-r--r--tcp_splice.c107
1 files changed, 55 insertions, 52 deletions
diff --git a/tcp_splice.c b/tcp_splice.c
index f048a82..4405224 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -44,7 +44,6 @@
#include <net/ethernet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
-#include <sys/epoll.h>
#include <sys/types.h>
#include <sys/socket.h>
@@ -56,6 +55,7 @@
#include "siphash.h"
#include "inany.h"
#include "flow.h"
+#include "epoll_ctl.h"
#include "flow_table.h"
@@ -95,7 +95,7 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af);
* conn_at_sidx() - Get spliced TCP connection specific flow at given sidx
* @sidx: Flow and side to retrieve
*
- * Return: Spliced TCP connection at @sidx, or NULL of @sidx is invalid.
+ * Return: spliced TCP connection at @sidx, or NULL of @sidx is invalid.
* Asserts if the flow at @sidx is not FLOW_TCP_SPLICE.
*/
static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
@@ -131,8 +131,12 @@ static void tcp_splice_conn_epoll_events(uint16_t events,
ev[1].events = EPOLLOUT;
}
- flow_foreach_sidei(sidei)
- ev[sidei].events |= (events & OUT_WAIT(sidei)) ? EPOLLOUT : 0;
+ flow_foreach_sidei(sidei) {
+ if (events & OUT_WAIT(sidei)) {
+ ev[sidei].events |= EPOLLOUT;
+ ev[!sidei].events &= ~EPOLLIN;
+ }
+ }
}
/**
@@ -145,7 +149,9 @@ static void tcp_splice_conn_epoll_events(uint16_t events,
static int tcp_splice_epoll_ctl(const struct ctx *c,
struct tcp_splice_conn *conn)
{
- int m = conn->in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
+ int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
+ : c->epollfd;
+ int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
const union epoll_ref ref[SIDES] = {
{ .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0],
.flowside = FLOW_SIDX(conn, 0) },
@@ -157,25 +163,24 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
tcp_splice_conn_epoll_events(conn->events, ev);
- if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
- epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
+
+ if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
+ epoll_ctl(epollfd, m, conn->s[1], &ev[1])) {
int ret = -errno;
- flow_err(conn, "ERROR on epoll_ctl(): %s", strerror_(errno));
+ flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
}
-
- conn->in_epoll = true;
+ flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
return 0;
}
/**
* conn_flag_do() - Set/unset given flag, log, update epoll on CLOSING flag
- * @c: Execution context
* @conn: Connection pointer
* @flag: Flag to set, or ~flag to unset
*/
-static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
+static void conn_flag_do(struct tcp_splice_conn *conn,
unsigned long flag)
{
if (flag & (flag - 1)) {
@@ -200,15 +205,15 @@ static void conn_flag_do(const struct ctx *c, struct tcp_splice_conn *conn,
}
if (flag == CLOSING) {
- epoll_del(c, conn->s[0]);
- epoll_del(c, conn->s[1]);
+ epoll_del(flow_epollfd(&conn->f), conn->s[0]);
+ epoll_del(flow_epollfd(&conn->f), conn->s[1]);
}
}
#define conn_flag(c, conn, flag) \
do { \
flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \
- conn_flag_do(c, conn, flag); \
+ conn_flag_do(conn, flag); \
} while (0)
/**
@@ -313,8 +318,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
if (conn->pipe[sidei][0] < 0) {
if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) {
- flow_err(conn, "cannot create %d->%d pipe: %s",
- sidei, !sidei, strerror_(errno));
+ flow_perror(conn, "cannot create %d->%d pipe",
+ sidei, !sidei);
conn_flag(c, conn, CLOSING);
return -EIO;
}
@@ -347,11 +352,10 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6;
uint8_t tgtpif = conn->f.pif[TGTSIDE];
union sockaddr_inany sa;
- socklen_t sl;
int one = 1;
if (tgtpif == PIF_HOST)
- conn->s[1] = tcp_conn_sock(c, af);
+ conn->s[1] = tcp_conn_sock(af);
else if (tgtpif == PIF_SPLICE)
conn->s[1] = tcp_conn_sock_ns(c, af);
else
@@ -375,16 +379,16 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
conn->s[1]);
}
- pif_sockaddr(c, &sa, &sl, tgtpif, &tgt->eaddr, tgt->eport);
+ pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
- if (connect(conn->s[1], &sa.sa, sl)) {
+ conn_event(c, conn, SPLICE_CONNECT);
+
+ if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
if (errno != EINPROGRESS) {
flow_trace(conn, "Couldn't connect socket for splice: %s",
strerror_(errno));
return -errno;
}
-
- conn_event(c, conn, SPLICE_CONNECT);
} else {
conn_event(c, conn, SPLICE_ESTABLISHED);
return tcp_splice_connect_finish(c, conn);
@@ -398,7 +402,7 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
* @c: Execution context
* @af: Address family (AF_INET or AF_INET6)
*
- * Return: Socket fd in the namespace on success, -errno on failure
+ * Return: socket fd in the namespace on success, -errno on failure
*/
static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af)
{
@@ -478,8 +482,7 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl);
if (rc)
- flow_err(conn, "Error retrieving SO_ERROR: %s",
- strerror_(errno));
+ flow_perror(conn, "Error retrieving SO_ERROR");
else
flow_trace(conn, "Error event on socket: %s",
strerror_(err));
@@ -517,20 +520,21 @@ swap:
int more = 0;
retry:
- readlen = splice(conn->s[fromsidei], NULL,
- conn->pipe[fromsidei][1], NULL,
- c->tcp.pipe_size,
- SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
+ do
+ readlen = splice(conn->s[fromsidei], NULL,
+ conn->pipe[fromsidei][1], NULL,
+ c->tcp.pipe_size,
+ SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
+ while (readlen < 0 && errno == EINTR);
+
+ if (readlen < 0 && errno != EAGAIN)
+ goto close;
+
flow_trace(conn, "%zi from read-side call", readlen);
- if (readlen < 0) {
- if (errno == EINTR)
- goto retry;
- if (errno != EAGAIN)
- goto close;
- } else if (!readlen) {
+ if (!readlen) {
eof = 1;
- } else {
+ } else if (readlen > 0) {
never_read = 0;
if (readlen >= (long)c->tcp.pipe_size * 90 / 100)
@@ -540,10 +544,16 @@ retry:
conn_flag(c, conn, lowat_act_flag);
}
-eintr:
- written = splice(conn->pipe[fromsidei][0], NULL,
- conn->s[!fromsidei], NULL, c->tcp.pipe_size,
- SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
+ do
+ written = splice(conn->pipe[fromsidei][0], NULL,
+ conn->s[!fromsidei], NULL,
+ c->tcp.pipe_size,
+ SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
+ while (written < 0 && errno == EINTR);
+
+ if (written < 0 && errno != EAGAIN)
+ goto close;
+
flow_trace(conn, "%zi from write-side call (passed %zi)",
written, c->tcp.pipe_size);
@@ -552,7 +562,7 @@ eintr:
if (readlen >= (long)c->tcp.pipe_size * 10 / 100)
continue;
- if (conn->flags & lowat_set_flag &&
+ if (!(conn->flags & lowat_set_flag) &&
readlen > (long)c->tcp.pipe_size / 10) {
int lowat = c->tcp.pipe_size / 4;
@@ -575,12 +585,6 @@ eintr:
conn->written[fromsidei] += written > 0 ? written : 0;
if (written < 0) {
- if (errno == EINTR)
- goto eintr;
-
- if (errno != EAGAIN)
- goto close;
-
if (conn->read[fromsidei] == conn->written[fromsidei])
break;
@@ -703,13 +707,13 @@ static int tcp_sock_refill_ns(void *arg)
ns_enter(c);
if (c->ifi4) {
- int rc = tcp_sock_refill_pool(c, ns_sock_pool4, AF_INET);
+ int rc = tcp_sock_refill_pool(ns_sock_pool4, AF_INET);
if (rc < 0)
warn("TCP: Error refilling IPv4 ns socket pool: %s",
strerror_(-rc));
}
if (c->ifi6) {
- int rc = tcp_sock_refill_pool(c, ns_sock_pool6, AF_INET6);
+ int rc = tcp_sock_refill_pool(ns_sock_pool6, AF_INET6);
if (rc < 0)
warn("TCP: Error refilling IPv6 ns socket pool: %s",
strerror_(-rc));
@@ -747,10 +751,9 @@ void tcp_splice_init(struct ctx *c)
/**
* tcp_splice_timer() - Timer for spliced connections
- * @c: Execution context
* @conn: Connection to handle
*/
-void tcp_splice_timer(const struct ctx *c, struct tcp_splice_conn *conn)
+void tcp_splice_timer(struct tcp_splice_conn *conn)
{
unsigned sidei;