aboutgitcodebugslistschat
path: root/tcp_splice.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcp_splice.c')
-rw-r--r--tcp_splice.c164
1 files changed, 93 insertions, 71 deletions
diff --git a/tcp_splice.c b/tcp_splice.c
index 717766a..d60981c 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -114,63 +114,44 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
* @events: Connection event flags
* @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket
*/
-static void tcp_splice_conn_epoll_events(uint16_t events,
- struct epoll_event ev[])
+static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
{
- unsigned sidei;
-
- flow_foreach_sidei(sidei)
- ev[sidei].events = 0;
+ uint32_t e = 0;
if (events & SPLICE_ESTABLISHED) {
- flow_foreach_sidei(sidei) {
- if (!(events & FIN_SENT(!sidei)))
- ev[sidei].events = EPOLLIN | EPOLLRDHUP;
- }
- } else if (events & SPLICE_CONNECT) {
- ev[1].events = EPOLLOUT;
+ if (!(events & FIN_SENT(!sidei)))
+ e = EPOLLIN | EPOLLRDHUP;
+ } else if (sidei == 1 && events & SPLICE_CONNECT) {
+ e = EPOLLOUT;
}
- flow_foreach_sidei(sidei) {
- if (events & OUT_WAIT(sidei)) {
- ev[sidei].events |= EPOLLOUT;
- ev[!sidei].events &= ~EPOLLIN;
- }
- }
+ if (events & OUT_WAIT(sidei))
+ e |= EPOLLOUT;
+ if (events & OUT_WAIT(!sidei))
+ e &= ~EPOLLIN;
+
+ return e;
}
/**
* tcp_splice_epoll_ctl() - Add/modify/delete epoll state from connection events
- * @c: Execution context
* @conn: Connection pointer
*
* Return: 0 on success, negative error code on failure (not on deletion)
*/
-static int tcp_splice_epoll_ctl(const struct ctx *c,
- struct tcp_splice_conn *conn)
+static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
{
- int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
- : c->epollfd;
- int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
- const union epoll_ref ref[SIDES] = {
- { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0],
- .flowside = FLOW_SIDX(conn, 0) },
- { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[1],
- .flowside = FLOW_SIDX(conn, 1) }
- };
- struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
- { .data.u64 = ref[1].u64 } };
-
- tcp_splice_conn_epoll_events(conn->events, ev);
+ uint32_t events[2];
+ events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
+ events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
- if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
- epoll_ctl(epollfd, m, conn->s[1], &ev[1])) {
+ if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[0], conn->s[0], 0) ||
+ flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[1], conn->s[1], 1)) {
int ret = -errno;
flow_perror(conn, "ERROR on epoll_ctl()");
return ret;
}
- flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
return 0;
}
@@ -210,7 +191,7 @@ static void conn_flag_do(struct tcp_splice_conn *conn,
}
}
-#define conn_flag(c, conn, flag) \
+#define conn_flag(conn, flag) \
do { \
flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \
conn_flag_do(conn, flag); \
@@ -218,12 +199,10 @@ static void conn_flag_do(struct tcp_splice_conn *conn,
/**
* conn_event_do() - Set and log connection events, update epoll state
- * @c: Execution context
* @conn: Connection pointer
* @event: Connection event
*/
-static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
- unsigned long event)
+static void conn_event_do(struct tcp_splice_conn *conn, unsigned long event)
{
if (event & (event - 1)) {
int flag_index = fls(~event);
@@ -245,16 +224,47 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
flow_dbg(conn, "%s", tcp_splice_event_str[flag_index]);
}
- if (tcp_splice_epoll_ctl(c, conn))
- conn_flag(c, conn, CLOSING);
+ if (tcp_splice_epoll_ctl(conn))
+ conn_flag(conn, CLOSING);
}
-#define conn_event(c, conn, event) \
+#define conn_event(conn, event) \
do { \
flow_trace(conn, "event at %s:%i",__func__, __LINE__); \
- conn_event_do(c, conn, event); \
+ conn_event_do(conn, event); \
} while (0)
+/**
+ * tcp_splice_rst() - Close spliced connection forcing RST on each side
+ * @conn: Connection pointer
+ */
+static void tcp_splice_rst(struct tcp_splice_conn *conn)
+{
+ const struct linger linger0 = {
+ .l_onoff = 1,
+ .l_linger = 0,
+ };
+ unsigned sidei;
+
+ if (conn->flags & CLOSING)
+ return; /* Nothing to do */
+
+ /* Force RST on sockets to inform the peer
+ *
+ * We do this by setting SO_LINGER with 0 timeout, which means that
+ * close() will send an RST (unless the connection is already closed in
+ * both directions).
+ */
+ flow_foreach_sidei(sidei) {
+ if (setsockopt(conn->s[sidei], SOL_SOCKET,
+ SO_LINGER, &linger0, sizeof(linger0)) < 0) {
+ flow_dbg_perror(conn,
+"SO_LINGER failed, may not send RST to peer");
+ }
+ }
+
+ conn_flag(conn, CLOSING);
+}
/**
* tcp_splice_flow_defer() - Deferred per-flow handling (clean up closed)
@@ -320,7 +330,7 @@ static int tcp_splice_connect_finish(const struct ctx *c,
if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) {
flow_perror(conn, "cannot create %d->%d pipe",
sidei, !sidei);
- conn_flag(c, conn, CLOSING);
+ tcp_splice_rst(conn);
return -EIO;
}
@@ -334,7 +344,7 @@ static int tcp_splice_connect_finish(const struct ctx *c,
}
if (!(conn->events & SPLICE_ESTABLISHED))
- conn_event(c, conn, SPLICE_ESTABLISHED);
+ conn_event(conn, SPLICE_ESTABLISHED);
return 0;
}
@@ -381,16 +391,24 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
+ flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
+ if (flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[0], 0) ||
+ flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[1], 1)) {
+ int ret = -errno;
+ flow_perror(conn, "Cannot register to epollfd");
+ return ret;
+ }
+
+ conn_event(conn, SPLICE_CONNECT);
+
if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
if (errno != EINPROGRESS) {
flow_trace(conn, "Couldn't connect socket for splice: %s",
strerror_(errno));
return -errno;
}
-
- conn_event(c, conn, SPLICE_CONNECT);
} else {
- conn_event(c, conn, SPLICE_ESTABLISHED);
+ conn_event(conn, SPLICE_ESTABLISHED);
return tcp_splice_connect_finish(c, conn);
}
@@ -450,7 +468,7 @@ void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
if (tcp_splice_connect(c, conn))
- conn_flag(c, conn, CLOSING);
+ tcp_splice_rst(conn);
FLOW_ACTIVATE(conn);
}
@@ -487,26 +505,26 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
flow_trace(conn, "Error event on socket: %s",
strerror_(err));
- goto close;
+ goto reset;
}
if (conn->events == SPLICE_CONNECT) {
if (!(events & EPOLLOUT))
- goto close;
+ goto reset;
if (tcp_splice_connect_finish(c, conn))
- goto close;
+ goto reset;
}
if (events & EPOLLOUT) {
fromsidei = !evsidei;
- conn_event(c, conn, ~OUT_WAIT(evsidei));
+ conn_event(conn, ~OUT_WAIT(evsidei));
} else {
fromsidei = evsidei;
}
if (events & EPOLLRDHUP)
/* For side 0 this is fake, but implied */
- conn_event(c, conn, FIN_RCVD(evsidei));
+ conn_event(conn, FIN_RCVD(evsidei));
swap:
eof = 0;
@@ -528,7 +546,7 @@ retry:
while (readlen < 0 && errno == EINTR);
if (readlen < 0 && errno != EAGAIN)
- goto close;
+ goto reset;
flow_trace(conn, "%zi from read-side call", readlen);
@@ -541,7 +559,7 @@ retry:
more = SPLICE_F_MORE;
if (conn->flags & lowat_set_flag)
- conn_flag(c, conn, lowat_act_flag);
+ conn_flag(conn, lowat_act_flag);
}
do
@@ -552,7 +570,7 @@ retry:
while (written < 0 && errno == EINTR);
if (written < 0 && errno != EAGAIN)
- goto close;
+ goto reset;
flow_trace(conn, "%zi from write-side call (passed %zi)",
written, c->tcp.pipe_size);
@@ -573,8 +591,8 @@ retry:
"Setting SO_RCVLOWAT %i: %s",
lowat, strerror_(errno));
} else {
- conn_flag(c, conn, lowat_set_flag);
- conn_flag(c, conn, lowat_act_flag);
+ conn_flag(conn, lowat_set_flag);
+ conn_flag(conn, lowat_act_flag);
}
}
@@ -588,7 +606,7 @@ retry:
if (conn->read[fromsidei] == conn->written[fromsidei])
break;
- conn_event(c, conn, OUT_WAIT(!fromsidei));
+ conn_event(conn, OUT_WAIT(!fromsidei));
break;
}
@@ -609,14 +627,18 @@ retry:
flow_foreach_sidei(sidei) {
if ((conn->events & FIN_RCVD(sidei)) &&
!(conn->events & FIN_SENT(!sidei))) {
- shutdown(conn->s[!sidei], SHUT_WR);
- conn_event(c, conn, FIN_SENT(!sidei));
+ if (shutdown(conn->s[!sidei], SHUT_WR) < 0)
+ goto reset;
+ conn_event(conn, FIN_SENT(!sidei));
}
}
}
- if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1)))
- goto close;
+ if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) {
+ /* Clean close, no reset */
+ conn_flag(conn, CLOSING);
+ return;
+ }
if ((events & (EPOLLIN | EPOLLOUT)) == (EPOLLIN | EPOLLOUT)) {
events = EPOLLIN;
@@ -626,12 +648,12 @@ retry:
}
if (events & EPOLLHUP)
- goto close;
+ goto reset;
return;
-close:
- conn_flag(c, conn, CLOSING);
+reset:
+ tcp_splice_rst(conn);
}
/**
@@ -767,10 +789,10 @@ void tcp_splice_timer(struct tcp_splice_conn *conn)
flow_trace(conn, "can't set SO_RCVLOWAT on %d",
conn->s[sidei]);
}
- conn_flag(c, conn, ~RCVLOWAT_SET(sidei));
+ conn_flag(conn, ~RCVLOWAT_SET(sidei));
}
}
flow_foreach_sidei(sidei)
- conn_flag(c, conn, ~RCVLOWAT_ACT(sidei));
+ conn_flag(conn, ~RCVLOWAT_ACT(sidei));
}