aboutgitcodebugslistschat
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2021-05-21 11:14:51 +0200
committerStefano Brivio <sbrivio@redhat.com>2021-05-21 11:14:51 +0200
commit19d254bbbb3ab319d15891ff7287f5182980c105 (patch)
tree160fbdecfe6b6e255e05236590325260be51cfd4
parent8ce188ecb0a0d19874f8c0e663d5d8adffa50d43 (diff)
downloadpasst-19d254bbbb3ab319d15891ff7287f5182980c105.tar
passt-19d254bbbb3ab319d15891ff7287f5182980c105.tar.gz
passt-19d254bbbb3ab319d15891ff7287f5182980c105.tar.bz2
passt-19d254bbbb3ab319d15891ff7287f5182980c105.tar.lz
passt-19d254bbbb3ab319d15891ff7287f5182980c105.tar.xz
passt-19d254bbbb3ab319d15891ff7287f5182980c105.tar.zst
passt-19d254bbbb3ab319d15891ff7287f5182980c105.zip
passt: Add support for multiple instances in different network namespaces
...sharing the same filesystem. Instead of a fixed path for the UNIX domain socket, passt now uses a path with a counter, probing for existing instances, and picking the first free one. The demo script is updated accordingly -- it can now be started several times to create multiple namespaces with an instance of passt each, with addressing reflecting separate subnets, and NDP proxying between them. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-rwxr-xr-xdoc/demo.sh84
-rw-r--r--passt.c46
-rw-r--r--passt.h5
-rw-r--r--pcap.c14
-rw-r--r--pcap.h2
-rw-r--r--util.h7
6 files changed, 114 insertions, 44 deletions
diff --git a/doc/demo.sh b/doc/demo.sh
index 5aed7f0..c7d2eac 100755
--- a/doc/demo.sh
+++ b/doc/demo.sh
@@ -30,7 +30,7 @@ ipv6_mangle() {
if [ ${__c} -lt 7 ]; then
printf "${__16b}:"
else
- printf "abcd\n" && break
+ printf "%04x\n" $((0xabc0 + ${2})) && break
fi
__c=$((__c + 1))
done
@@ -40,43 +40,66 @@ ipv6_mangle() {
ndp_setup() {
sysctl -w net.ipv6.conf.all.proxy_ndp=1
ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)"
+
+ for i in `seq 1 63`; do
+ __neigh="$(ipv6_mangle ${1} ${i})"
+ if [ "${__neigh}" != "${1}" ]; then
+ ip -6 neigh add proxy "${__neigh}" dev "${2}"
+ fi
+ done
}
-ip netns del passt 2>/dev/null || :
-ip link del veth_passt 2>/dev/null || :
-ip netns add passt
-ip link add veth_passt up netns passt type veth peer name veth_passt
-ip link set dev veth_passt up
-ip -n passt link set dev lo up
+ns_idx=0
+for i in `seq 1 63`; do
+ ns="passt_${i}"
+ ns_idx=${i}
+
+ busy=0
+ for p in $(pidof passt); do
+ [ "$(ip netns identify ${p})" = "${ns}" ] && busy=1 && break
+ done
+ [ ${busy} -eq 0 ] && break
+done
+
+[ ${busy} -ne 0 ] && echo "Couldn't create namespace" && exit 1
+
+ip netns del "${ns}" 2>/dev/null || :
+ip netns add "${ns}"
+ip link del "veth_${ns}" 2>/dev/null || :
+ip link add "veth_${ns}" up netns "${ns}" type veth peer name "veth_${ns}"
+ip link set dev "veth_${ns}" up
+ip -n "${ns}" link set dev lo up
+ipv4_main="192.0.2.$(((ns_idx - 1) * 4 + 1))"
+ipv4_ns="192.0.2.$(((ns_idx - 1) * 4 + 2))"
-ip -n passt addr add 192.0.2.2/24 dev veth_passt
-ip addr add 192.0.2.1/24 dev veth_passt
-ip -n passt route add default via 192.0.2.1
+ip -n "${ns}" addr add "${ipv4_ns}/30" dev "veth_${ns}"
+ip addr add "${ipv4_main}/30" dev "veth_${ns}"
+ip -n "${ns}" route add default via "${ipv4_main}"
sysctl -w net.ipv4.ip_forward=1
-nft delete table passt_nat 2>/dev/null || :
-nft add table passt_nat
-nft 'add chain passt_nat postrouting { type nat hook postrouting priority -100 ; }'
-nft add rule passt_nat postrouting ip saddr 192.0.2.2 masquerade
+nft delete table "${ns}_nat" 2>/dev/null || :
+nft add table "${ns}_nat"
+nft add chain "${ns}_nat" postrouting '{ type nat hook postrouting priority -100 ; }'
+nft add rule "${ns}_nat" postrouting ip saddr "${ipv4_ns}" masquerade
ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")"
-ipv6_passt="$(ipv6_mangle "${ipv6_addr}")"
-ndp_setup "${ipv6_passt}"
-ip -n passt addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev veth_passt
-ip addr add "${ipv6_addr}" dev veth_passt
-ip route add "${ipv6_passt}" dev veth_passt
-passt_ll="$(ipv6_ll_addr "veth_passt")"
-main_ll="$(get_token "link/ether" $(ip -o li sh veth_passt))"
-ip neigh add "${passt_ll%%/*}" dev veth_passt lladdr "${main_ll}"
-ip -n passt route add default via "${passt_ll%%/*}" dev veth_passt
+ipv6_passt="$(ipv6_mangle "${ipv6_addr}" ${ns_idx})"
+ndp_setup "${ipv6_passt}" "veth_${ns}"
+ip -n "${ns}" addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev "veth_${ns}"
+ip addr add "${ipv6_addr}" dev "veth_${ns}"
+ip route add "${ipv6_passt}" dev "veth_${ns}"
+passt_ll="$(ipv6_ll_addr "veth_${ns}")"
+main_ll="$(get_token "link/ether" $(ip -o li sh "veth_${ns}"))"
+ip neigh add "${passt_ll%%/*}" dev "veth_${ns}" lladdr "${main_ll}"
+ip -n "${ns}" route add default via "${passt_ll%%/*}" dev "veth_${ns}"
sysctl -w net.ipv6.conf.all.forwarding=1
-ethtool -K veth_passt tx off
-ip netns exec passt ethtool -K veth_passt tx off
-ip netns exec passt sysctl -w net.ipv4.ping_group_range="0 2147483647"
+ethtool -K "veth_${ns}" tx off
+ip netns exec "${ns}" ethtool -K "veth_${ns}" tx off
+ip netns exec "${ns}" sysctl -w net.ipv4.ping_group_range="0 2147483647"
sysctl -w net.core.rmem_max=16777216
@@ -84,5 +107,12 @@ sysctl -w net.core.wmem_max=16777216
sysctl -w net.core.rmem_default=16777216
sysctl -w net.core.wmem_default=16777216
+echo
+echo "Namespace ${ns} set up, addresses:"
+echo " ${ipv4_ns}"
+echo " ${ipv6_passt}"
+echo
+echo "Starting passt..."
+echo
-ip netns exec passt ./passt
+ip netns exec "${ns}" ./passt
diff --git a/passt.c b/passt.c
index 466cae8..a057d46 100644
--- a/passt.c
+++ b/passt.c
@@ -20,7 +20,6 @@
#include <sys/ioctl.h>
#include <sys/resource.h>
#include <sys/uio.h>
-#include <sys/un.h>
#include <ifaddrs.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
@@ -30,6 +29,7 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmpv6.h>
+#include <linux/un.h>
#include <linux/if_link.h>
#include <net/ethernet.h>
#include <stdlib.h>
@@ -82,31 +82,50 @@ static char *ip_proto_str[IPPROTO_SCTP + 1] = {
/**
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
+ * @index: Index used in socket path, filled on success
*
* Return: newly created socket, doesn't return on error
*/
-static int sock_unix(void)
+static int sock_unix(int *index)
{
- int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ int fd = socket(AF_UNIX, SOCK_STREAM, 0), ex;
struct sockaddr_un addr = {
.sun_family = AF_UNIX,
- .sun_path = UNIX_SOCK_PATH,
};
+ int i, ret;
if (fd < 0) {
perror("UNIX socket");
exit(EXIT_FAILURE);
}
- unlink(UNIX_SOCK_PATH);
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ for (i = 1; i < UNIX_SOCK_MAX; i++) {
+ snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
+
+ ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
+ if (!ret || errno != ECONNREFUSED) {
+ close(ex);
+ continue;
+ }
+ close(ex);
+
+ unlink(addr.sun_path);
+ if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)))
+ break;
+ }
+
+ if (i == UNIX_SOCK_MAX) {
perror("UNIX socket bind");
exit(EXIT_FAILURE);
}
- chmod(UNIX_SOCK_PATH,
+ info("UNIX domain socket bound at %s\n", addr.sun_path);
+ chmod(addr.sun_path,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
+ *index = i;
+
return fd;
}
@@ -743,11 +762,11 @@ void usage(const char *name)
int main(int argc, char **argv)
{
struct epoll_event events[EPOLL_EVENTS];
+ int nfds, i, fd_unix, sock_index;
char buf6[INET6_ADDRSTRLEN];
char buf4[INET_ADDRSTRLEN];
struct epoll_event ev = { 0 };
struct ctx c = { 0 };
- int nfds, i, fd_unix;
struct rlimit limit;
struct timespec now;
@@ -785,7 +804,7 @@ int main(int argc, char **argv)
get_addrs(&c);
get_dns(&c);
- fd_unix = sock_unix();
+ fd_unix = sock_unix(&sock_index);
if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c))
exit(EXIT_FAILURE);
@@ -795,7 +814,7 @@ int main(int argc, char **argv)
memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
- pcap_init();
+ pcap_init(sock_index);
if (c.v4) {
info("ARP:");
@@ -841,14 +860,14 @@ int main(int argc, char **argv)
}
listen:
- listen(fd_unix, 1);
+ listen(fd_unix, 0);
info("You can now start qrap:");
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
info("or directly qemu, patched with:");
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
info("as follows:");
- info(" kvm ... -net socket,connect="
- UNIX_SOCK_PATH " -net nic,model=virtio");
+ info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH
+ " -net nic,model=virtio", sock_index);
#ifndef DEBUG
if (daemon(0, 0)) {
@@ -858,6 +877,7 @@ listen:
#endif
c.fd_unix = accept(fd_unix, NULL, NULL);
+
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
ev.data.fd = c.fd_unix;
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
diff --git a/passt.h b/passt.h
index bfbdc06..28840fc 100644
--- a/passt.h
+++ b/passt.h
@@ -1,4 +1,5 @@
-#define UNIX_SOCK_PATH "/tmp/passt.socket"
+#define UNIX_SOCK_MAX 100
+#define UNIX_SOCK_PATH "/tmp/passt_%i.socket"
/**
* struct tap_msg - Generic message descriptor for arrays of messages
@@ -26,6 +27,8 @@ struct fqdn {
char n[NS_MAXDNAME];
};
+#include <net/if.h>
+
/**
* struct ctx - Execution context
* @epollfd: file descriptor for epoll instance
diff --git a/pcap.c b/pcap.c
index 2767b53..8dd647a 100644
--- a/pcap.c
+++ b/pcap.c
@@ -20,6 +20,10 @@
#include <time.h>
#include <net/ethernet.h>
#include <unistd.h>
+#include <net/if.h>
+
+#include "passt.h"
+#include "util.h"
#ifdef DEBUG
@@ -77,9 +81,9 @@ void pcap(char *pkt, size_t len)
write(pcap_fd, pkt, len);
}
-void pcap_init(void)
+void pcap_init(int sock_index)
{
- char name[] = PCAP_PREFIX PCAP_ISO8601_STR ".pcap";
+ char name[] = PCAP_PREFIX PCAP_ISO8601_STR STR(UNIX_SOCK_MAX) ".pcap";
struct timeval tv;
struct tm *tm;
@@ -88,6 +92,10 @@ void pcap_init(void)
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
PCAP_ISO8601_FORMAT, tm);
+ snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
+ sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
+ "_%i.pcap", sock_index);
+
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (pcap_fd == -1) {
@@ -95,6 +103,8 @@ void pcap_init(void)
return;
}
+ info("Saving packet capture at %s", name);
+
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
}
diff --git a/pcap.h b/pcap.h
index 00bafee..abca097 100644
--- a/pcap.h
+++ b/pcap.h
@@ -1,2 +1,2 @@
void pcap(char *pkt, size_t len);
-void pcap_init(void);
+void pcap_init(int sock_index);
diff --git a/util.h b/util.h
index 7d0704c..3e24c9a 100644
--- a/util.h
+++ b/util.h
@@ -29,6 +29,9 @@ void debug(const char *format, ...);
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
+#define STRINGIFY(x) #x
+#define STR(x) STRINGIFY(x)
+
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
@@ -37,6 +40,10 @@ void debug(const char *format, ...);
#define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */
+#include <linux/ipv6.h>
+#include <net/if.h>
+#include <linux/ip.h>
+
uint16_t csum_fold(uint32_t sum);
uint16_t csum_ip4(void *buf, size_t len);
void csum_tcp4(struct iphdr *iph);