From 19d254bbbb3ab319d15891ff7287f5182980c105 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 21 May 2021 11:14:51 +0200 Subject: passt: Add support for multiple instances in different network namespaces ...sharing the same filesystem. Instead of a fixed path for the UNIX domain socket, passt now uses a path with a counter, probing for existing instances, and picking the first free one. The demo script is updated accordingly -- it can now be started several times to create multiple namespaces with an instance of passt each, with addressing reflecting separate subnets, and NDP proxying between them. Signed-off-by: Stefano Brivio --- doc/demo.sh | 84 +++++++++++++++++++++++++++++++++++++++++-------------------- passt.c | 46 +++++++++++++++++++++++---------- passt.h | 5 +++- pcap.c | 14 +++++++++-- pcap.h | 2 +- util.h | 7 ++++++ 6 files changed, 114 insertions(+), 44 deletions(-) diff --git a/doc/demo.sh b/doc/demo.sh index 5aed7f0..c7d2eac 100755 --- a/doc/demo.sh +++ b/doc/demo.sh @@ -30,7 +30,7 @@ ipv6_mangle() { if [ ${__c} -lt 7 ]; then printf "${__16b}:" else - printf "abcd\n" && break + printf "%04x\n" $((0xabc0 + ${2})) && break fi __c=$((__c + 1)) done @@ -40,43 +40,66 @@ ipv6_mangle() { ndp_setup() { sysctl -w net.ipv6.conf.all.proxy_ndp=1 ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)" + + for i in `seq 1 63`; do + __neigh="$(ipv6_mangle ${1} ${i})" + if [ "${__neigh}" != "${1}" ]; then + ip -6 neigh add proxy "${__neigh}" dev "${2}" + fi + done } -ip netns del passt 2>/dev/null || : -ip link del veth_passt 2>/dev/null || : -ip netns add passt -ip link add veth_passt up netns passt type veth peer name veth_passt -ip link set dev veth_passt up -ip -n passt link set dev lo up +ns_idx=0 +for i in `seq 1 63`; do + ns="passt_${i}" + ns_idx=${i} + + busy=0 + for p in $(pidof passt); do + [ "$(ip netns identify ${p})" = "${ns}" ] && busy=1 && break + done + [ ${busy} -eq 0 ] && break +done + +[ ${busy} -ne 0 ] && echo "Couldn't create namespace" && exit 1 + +ip netns del "${ns}" 2>/dev/null || : +ip netns add "${ns}" +ip link del "veth_${ns}" 2>/dev/null || : +ip link add "veth_${ns}" up netns "${ns}" type veth peer name "veth_${ns}" +ip link set dev "veth_${ns}" up +ip -n "${ns}" link set dev lo up +ipv4_main="192.0.2.$(((ns_idx - 1) * 4 + 1))" +ipv4_ns="192.0.2.$(((ns_idx - 1) * 4 + 2))" -ip -n passt addr add 192.0.2.2/24 dev veth_passt -ip addr add 192.0.2.1/24 dev veth_passt -ip -n passt route add default via 192.0.2.1 +ip -n "${ns}" addr add "${ipv4_ns}/30" dev "veth_${ns}" +ip addr add "${ipv4_main}/30" dev "veth_${ns}" +ip -n "${ns}" route add default via "${ipv4_main}" sysctl -w net.ipv4.ip_forward=1 -nft delete table passt_nat 2>/dev/null || : -nft add table passt_nat -nft 'add chain passt_nat postrouting { type nat hook postrouting priority -100 ; }' -nft add rule passt_nat postrouting ip saddr 192.0.2.2 masquerade +nft delete table "${ns}_nat" 2>/dev/null || : +nft add table "${ns}_nat" +nft add chain "${ns}_nat" postrouting '{ type nat hook postrouting priority -100 ; }' +nft add rule "${ns}_nat" postrouting ip saddr "${ipv4_ns}" masquerade ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")" -ipv6_passt="$(ipv6_mangle "${ipv6_addr}")" -ndp_setup "${ipv6_passt}" -ip -n passt addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev veth_passt -ip addr add "${ipv6_addr}" dev veth_passt -ip route add "${ipv6_passt}" dev veth_passt -passt_ll="$(ipv6_ll_addr "veth_passt")" -main_ll="$(get_token "link/ether" $(ip -o li sh veth_passt))" -ip neigh add "${passt_ll%%/*}" dev veth_passt lladdr "${main_ll}" -ip -n passt route add default via "${passt_ll%%/*}" dev veth_passt +ipv6_passt="$(ipv6_mangle "${ipv6_addr}" ${ns_idx})" +ndp_setup "${ipv6_passt}" "veth_${ns}" +ip -n "${ns}" addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev "veth_${ns}" +ip addr add "${ipv6_addr}" dev "veth_${ns}" +ip route add "${ipv6_passt}" dev "veth_${ns}" +passt_ll="$(ipv6_ll_addr "veth_${ns}")" +main_ll="$(get_token "link/ether" $(ip -o li sh "veth_${ns}"))" +ip neigh add "${passt_ll%%/*}" dev "veth_${ns}" lladdr "${main_ll}" +ip -n "${ns}" route add default via "${passt_ll%%/*}" dev "veth_${ns}" sysctl -w net.ipv6.conf.all.forwarding=1 -ethtool -K veth_passt tx off -ip netns exec passt ethtool -K veth_passt tx off -ip netns exec passt sysctl -w net.ipv4.ping_group_range="0 2147483647" +ethtool -K "veth_${ns}" tx off +ip netns exec "${ns}" ethtool -K "veth_${ns}" tx off +ip netns exec "${ns}" sysctl -w net.ipv4.ping_group_range="0 2147483647" sysctl -w net.core.rmem_max=16777216 @@ -84,5 +107,12 @@ sysctl -w net.core.wmem_max=16777216 sysctl -w net.core.rmem_default=16777216 sysctl -w net.core.wmem_default=16777216 +echo +echo "Namespace ${ns} set up, addresses:" +echo " ${ipv4_ns}" +echo " ${ipv6_passt}" +echo +echo "Starting passt..." +echo -ip netns exec passt ./passt +ip netns exec "${ns}" ./passt diff --git a/passt.c b/passt.c index 466cae8..a057d46 100644 --- a/passt.c +++ b/passt.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -30,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -82,31 +82,50 @@ static char *ip_proto_str[IPPROTO_SCTP + 1] = { /** * sock_unix() - Create and bind AF_UNIX socket, add to epoll list + * @index: Index used in socket path, filled on success * * Return: newly created socket, doesn't return on error */ -static int sock_unix(void) +static int sock_unix(int *index) { - int fd = socket(AF_UNIX, SOCK_STREAM, 0); + int fd = socket(AF_UNIX, SOCK_STREAM, 0), ex; struct sockaddr_un addr = { .sun_family = AF_UNIX, - .sun_path = UNIX_SOCK_PATH, }; + int i, ret; if (fd < 0) { perror("UNIX socket"); exit(EXIT_FAILURE); } - unlink(UNIX_SOCK_PATH); - if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { + for (i = 1; i < UNIX_SOCK_MAX; i++) { + snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i); + + ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0); + ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr)); + if (!ret || errno != ECONNREFUSED) { + close(ex); + continue; + } + close(ex); + + unlink(addr.sun_path); + if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr))) + break; + } + + if (i == UNIX_SOCK_MAX) { perror("UNIX socket bind"); exit(EXIT_FAILURE); } - chmod(UNIX_SOCK_PATH, + info("UNIX domain socket bound at %s\n", addr.sun_path); + chmod(addr.sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); + *index = i; + return fd; } @@ -743,11 +762,11 @@ void usage(const char *name) int main(int argc, char **argv) { struct epoll_event events[EPOLL_EVENTS]; + int nfds, i, fd_unix, sock_index; char buf6[INET6_ADDRSTRLEN]; char buf4[INET_ADDRSTRLEN]; struct epoll_event ev = { 0 }; struct ctx c = { 0 }; - int nfds, i, fd_unix; struct rlimit limit; struct timespec now; @@ -785,7 +804,7 @@ int main(int argc, char **argv) get_addrs(&c); get_dns(&c); - fd_unix = sock_unix(); + fd_unix = sock_unix(&sock_index); if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c)) exit(EXIT_FAILURE); @@ -795,7 +814,7 @@ int main(int argc, char **argv) memset(&c.mac_guest, 0xff, sizeof(c.mac_guest)); - pcap_init(); + pcap_init(sock_index); if (c.v4) { info("ARP:"); @@ -841,14 +860,14 @@ int main(int argc, char **argv) } listen: - listen(fd_unix, 1); + listen(fd_unix, 0); info("You can now start qrap:"); info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio"); info("or directly qemu, patched with:"); info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch"); info("as follows:"); - info(" kvm ... -net socket,connect=" - UNIX_SOCK_PATH " -net nic,model=virtio"); + info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH + " -net nic,model=virtio", sock_index); #ifndef DEBUG if (daemon(0, 0)) { @@ -858,6 +877,7 @@ listen: #endif c.fd_unix = accept(fd_unix, NULL, NULL); + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP; ev.data.fd = c.fd_unix; epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev); diff --git a/passt.h b/passt.h index bfbdc06..28840fc 100644 --- a/passt.h +++ b/passt.h @@ -1,4 +1,5 @@ -#define UNIX_SOCK_PATH "/tmp/passt.socket" +#define UNIX_SOCK_MAX 100 +#define UNIX_SOCK_PATH "/tmp/passt_%i.socket" /** * struct tap_msg - Generic message descriptor for arrays of messages @@ -26,6 +27,8 @@ struct fqdn { char n[NS_MAXDNAME]; }; +#include + /** * struct ctx - Execution context * @epollfd: file descriptor for epoll instance diff --git a/pcap.c b/pcap.c index 2767b53..8dd647a 100644 --- a/pcap.c +++ b/pcap.c @@ -20,6 +20,10 @@ #include #include #include +#include + +#include "passt.h" +#include "util.h" #ifdef DEBUG @@ -77,9 +81,9 @@ void pcap(char *pkt, size_t len) write(pcap_fd, pkt, len); } -void pcap_init(void) +void pcap_init(int sock_index) { - char name[] = PCAP_PREFIX PCAP_ISO8601_STR ".pcap"; + char name[] = PCAP_PREFIX PCAP_ISO8601_STR STR(UNIX_SOCK_MAX) ".pcap"; struct timeval tv; struct tm *tm; @@ -88,6 +92,10 @@ void pcap_init(void) strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1, PCAP_ISO8601_FORMAT, tm); + snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR), + sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR), + "_%i.pcap", sock_index); + pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (pcap_fd == -1) { @@ -95,6 +103,8 @@ void pcap_init(void) return; } + info("Saving packet capture at %s", name); + write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr)); } diff --git a/pcap.h b/pcap.h index 00bafee..abca097 100644 --- a/pcap.h +++ b/pcap.h @@ -1,2 +1,2 @@ void pcap(char *pkt, size_t len); -void pcap_init(void); +void pcap_init(int sock_index); diff --git a/util.h b/util.h index 7d0704c..3e24c9a 100644 --- a/util.h +++ b/util.h @@ -29,6 +29,9 @@ void debug(const char *format, ...); #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #endif +#define STRINGIFY(x) #x +#define STR(x) STRINGIFY(x) + #define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0]))) #define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b)) @@ -37,6 +40,10 @@ void debug(const char *format, ...); #define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */ +#include +#include +#include + uint16_t csum_fold(uint32_t sum); uint16_t csum_ip4(void *buf, size_t len); void csum_tcp4(struct iphdr *iph); -- cgit v1.2.3