diff options
Diffstat (limited to 'tcp_splice.c')
| -rw-r--r-- | tcp_splice.c | 164 |
1 files changed, 93 insertions, 71 deletions
diff --git a/tcp_splice.c b/tcp_splice.c index 717766a..d60981c 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -114,63 +114,44 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx) * @events: Connection event flags * @ev: Events to fill in, 0 is accepted socket, 1 is connecting socket */ -static void tcp_splice_conn_epoll_events(uint16_t events, - struct epoll_event ev[]) +static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei) { - unsigned sidei; - - flow_foreach_sidei(sidei) - ev[sidei].events = 0; + uint32_t e = 0; if (events & SPLICE_ESTABLISHED) { - flow_foreach_sidei(sidei) { - if (!(events & FIN_SENT(!sidei))) - ev[sidei].events = EPOLLIN | EPOLLRDHUP; - } - } else if (events & SPLICE_CONNECT) { - ev[1].events = EPOLLOUT; + if (!(events & FIN_SENT(!sidei))) + e = EPOLLIN | EPOLLRDHUP; + } else if (sidei == 1 && events & SPLICE_CONNECT) { + e = EPOLLOUT; } - flow_foreach_sidei(sidei) { - if (events & OUT_WAIT(sidei)) { - ev[sidei].events |= EPOLLOUT; - ev[!sidei].events &= ~EPOLLIN; - } - } + if (events & OUT_WAIT(sidei)) + e |= EPOLLOUT; + if (events & OUT_WAIT(!sidei)) + e &= ~EPOLLIN; + + return e; } /** * tcp_splice_epoll_ctl() - Add/modify/delete epoll state from connection events - * @c: Execution context * @conn: Connection pointer * * Return: 0 on success, negative error code on failure (not on deletion) */ -static int tcp_splice_epoll_ctl(const struct ctx *c, - struct tcp_splice_conn *conn) +static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn) { - int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f) - : c->epollfd; - int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; - const union epoll_ref ref[SIDES] = { - { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0], - .flowside = FLOW_SIDX(conn, 0) }, - { .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[1], - .flowside = FLOW_SIDX(conn, 1) } - }; - struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 }, - { .data.u64 = ref[1].u64 } }; - - tcp_splice_conn_epoll_events(conn->events, ev); + uint32_t events[2]; + events[0] = tcp_splice_conn_epoll_events(conn->events, 0); + events[1] = tcp_splice_conn_epoll_events(conn->events, 1); - if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) || - epoll_ctl(epollfd, m, conn->s[1], &ev[1])) { + if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[0], conn->s[0], 0) || + flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events[1], conn->s[1], 1)) { int ret = -errno; flow_perror(conn, "ERROR on epoll_ctl()"); return ret; } - flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT); return 0; } @@ -210,7 +191,7 @@ static void conn_flag_do(struct tcp_splice_conn *conn, } } -#define conn_flag(c, conn, flag) \ +#define conn_flag(conn, flag) \ do { \ flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \ conn_flag_do(conn, flag); \ @@ -218,12 +199,10 @@ static void conn_flag_do(struct tcp_splice_conn *conn, /** * conn_event_do() - Set and log connection events, update epoll state - * @c: Execution context * @conn: Connection pointer * @event: Connection event */ -static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn, - unsigned long event) +static void conn_event_do(struct tcp_splice_conn *conn, unsigned long event) { if (event & (event - 1)) { int flag_index = fls(~event); @@ -245,16 +224,47 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn, flow_dbg(conn, "%s", tcp_splice_event_str[flag_index]); } - if (tcp_splice_epoll_ctl(c, conn)) - conn_flag(c, conn, CLOSING); + if (tcp_splice_epoll_ctl(conn)) + conn_flag(conn, CLOSING); } -#define conn_event(c, conn, event) \ +#define conn_event(conn, event) \ do { \ flow_trace(conn, "event at %s:%i",__func__, __LINE__); \ - conn_event_do(c, conn, event); \ + conn_event_do(conn, event); \ } while (0) +/** + * tcp_splice_rst() - Close spliced connection forcing RST on each side + * @conn: Connection pointer + */ +static void tcp_splice_rst(struct tcp_splice_conn *conn) +{ + const struct linger linger0 = { + .l_onoff = 1, + .l_linger = 0, + }; + unsigned sidei; + + if (conn->flags & CLOSING) + return; /* Nothing to do */ + + /* Force RST on sockets to inform the peer + * + * We do this by setting SO_LINGER with 0 timeout, which means that + * close() will send an RST (unless the connection is already closed in + * both directions). + */ + flow_foreach_sidei(sidei) { + if (setsockopt(conn->s[sidei], SOL_SOCKET, + SO_LINGER, &linger0, sizeof(linger0)) < 0) { + flow_dbg_perror(conn, +"SO_LINGER failed, may not send RST to peer"); + } + } + + conn_flag(conn, CLOSING); +} /** * tcp_splice_flow_defer() - Deferred per-flow handling (clean up closed) @@ -320,7 +330,7 @@ static int tcp_splice_connect_finish(const struct ctx *c, if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) { flow_perror(conn, "cannot create %d->%d pipe", sidei, !sidei); - conn_flag(c, conn, CLOSING); + tcp_splice_rst(conn); return -EIO; } @@ -334,7 +344,7 @@ static int tcp_splice_connect_finish(const struct ctx *c, } if (!(conn->events & SPLICE_ESTABLISHED)) - conn_event(c, conn, SPLICE_ESTABLISHED); + conn_event(conn, SPLICE_ESTABLISHED); return 0; } @@ -381,16 +391,24 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn) pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport); + flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT); + if (flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[0], 0) || + flow_epoll_set(&conn->f, EPOLL_CTL_ADD, 0, conn->s[1], 1)) { + int ret = -errno; + flow_perror(conn, "Cannot register to epollfd"); + return ret; + } + + conn_event(conn, SPLICE_CONNECT); + if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) { if (errno != EINPROGRESS) { flow_trace(conn, "Couldn't connect socket for splice: %s", strerror_(errno)); return -errno; } - - conn_event(c, conn, SPLICE_CONNECT); } else { - conn_event(c, conn, SPLICE_ESTABLISHED); + conn_event(conn, SPLICE_ESTABLISHED); return tcp_splice_connect_finish(c, conn); } @@ -450,7 +468,7 @@ void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0) flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0); if (tcp_splice_connect(c, conn)) - conn_flag(c, conn, CLOSING); + tcp_splice_rst(conn); FLOW_ACTIVATE(conn); } @@ -487,26 +505,26 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, flow_trace(conn, "Error event on socket: %s", strerror_(err)); - goto close; + goto reset; } if (conn->events == SPLICE_CONNECT) { if (!(events & EPOLLOUT)) - goto close; + goto reset; if (tcp_splice_connect_finish(c, conn)) - goto close; + goto reset; } if (events & EPOLLOUT) { fromsidei = !evsidei; - conn_event(c, conn, ~OUT_WAIT(evsidei)); + conn_event(conn, ~OUT_WAIT(evsidei)); } else { fromsidei = evsidei; } if (events & EPOLLRDHUP) /* For side 0 this is fake, but implied */ - conn_event(c, conn, FIN_RCVD(evsidei)); + conn_event(conn, FIN_RCVD(evsidei)); swap: eof = 0; @@ -528,7 +546,7 @@ retry: while (readlen < 0 && errno == EINTR); if (readlen < 0 && errno != EAGAIN) - goto close; + goto reset; flow_trace(conn, "%zi from read-side call", readlen); @@ -541,7 +559,7 @@ retry: more = SPLICE_F_MORE; if (conn->flags & lowat_set_flag) - conn_flag(c, conn, lowat_act_flag); + conn_flag(conn, lowat_act_flag); } do @@ -552,7 +570,7 @@ retry: while (written < 0 && errno == EINTR); if (written < 0 && errno != EAGAIN) - goto close; + goto reset; flow_trace(conn, "%zi from write-side call (passed %zi)", written, c->tcp.pipe_size); @@ -573,8 +591,8 @@ retry: "Setting SO_RCVLOWAT %i: %s", lowat, strerror_(errno)); } else { - conn_flag(c, conn, lowat_set_flag); - conn_flag(c, conn, lowat_act_flag); + conn_flag(conn, lowat_set_flag); + conn_flag(conn, lowat_act_flag); } } @@ -588,7 +606,7 @@ retry: if (conn->read[fromsidei] == conn->written[fromsidei]) break; - conn_event(c, conn, OUT_WAIT(!fromsidei)); + conn_event(conn, OUT_WAIT(!fromsidei)); break; } @@ -609,14 +627,18 @@ retry: flow_foreach_sidei(sidei) { if ((conn->events & FIN_RCVD(sidei)) && !(conn->events & FIN_SENT(!sidei))) { - shutdown(conn->s[!sidei], SHUT_WR); - conn_event(c, conn, FIN_SENT(!sidei)); + if (shutdown(conn->s[!sidei], SHUT_WR) < 0) + goto reset; + conn_event(conn, FIN_SENT(!sidei)); } } } - if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) - goto close; + if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1))) { + /* Clean close, no reset */ + conn_flag(conn, CLOSING); + return; + } if ((events & (EPOLLIN | EPOLLOUT)) == (EPOLLIN | EPOLLOUT)) { events = EPOLLIN; @@ -626,12 +648,12 @@ retry: } if (events & EPOLLHUP) - goto close; + goto reset; return; -close: - conn_flag(c, conn, CLOSING); +reset: + tcp_splice_rst(conn); } /** @@ -767,10 +789,10 @@ void tcp_splice_timer(struct tcp_splice_conn *conn) flow_trace(conn, "can't set SO_RCVLOWAT on %d", conn->s[sidei]); } - conn_flag(c, conn, ~RCVLOWAT_SET(sidei)); + conn_flag(conn, ~RCVLOWAT_SET(sidei)); } } flow_foreach_sidei(sidei) - conn_flag(c, conn, ~RCVLOWAT_ACT(sidei)); + conn_flag(conn, ~RCVLOWAT_ACT(sidei)); } |
