aboutgitcodebugslistschat
path: root/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch')
-rw-r--r--contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch462
1 files changed, 462 insertions, 0 deletions
diff --git a/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch b/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch
new file mode 100644
index 0000000..e0dffa5
--- /dev/null
+++ b/contrib/kata-containers/0001-virtcontainers-agent-Add-passt-networking-model-and-.patch
@@ -0,0 +1,462 @@
+From e1b250fc0b5e377285db5d90476fdd2d63501191 Mon Sep 17 00:00:00 2001
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Fri, 28 Jan 2022 01:09:23 +0100
+Subject: [PATCH] virtcontainers, agent: Add passt networking model and
+ endpoint
+
+This implements a draft support for user-mode networking using
+passt (https://passt.top), the corresponding networking model
+can be enabled via:
+
+ internetworking_model=passt
+
+in the [runtime] section of the TOML configuration file.
+
+The networking endpoint does essentially nothing, other than
+starting and stopping passt as needed: no interfaces are configured,
+qemu connects to passt via UNIX domain socket, the corresponding
+command line option is appended if this networking model is
+selected.
+
+The passt instance started by the endpoint take cares of forwarding
+traffic back and forth, translating between the L2 frames qemu-side
+and native L4 sockets on the host.
+
+This network setup doesn't need elevated privileges or any kind of
+capability. However, this patch doesn't implement privileges drop
+as the containerd interface allows only runtimes running as the
+same user to connect to its own UNIX domain socket interface,
+typically root (at least in the case of CRI-O), and root privileges
+might anyway be needed for other purposes (block devices, etc.)
+
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+---
+SPDX-FileCopyrightText: 2021-2022 Red Hat GmbH <sbrivio@redhat.com>
+SPDX-License-Identifier: Apache-2.0
+
+ src/agent/src/netlink.rs | 3 +-
+ .../kata-containers/govmm/qemu/qemu.go | 23 ++-
+ src/runtime/virtcontainers/endpoint.go | 7 +
+ src/runtime/virtcontainers/network.go | 24 +++
+ src/runtime/virtcontainers/passt_endpoint.go | 156 ++++++++++++++++++
+ .../virtcontainers/persist/api/network.go | 5 +
+ src/runtime/virtcontainers/qemu_arch_base.go | 11 ++
+ 7 files changed, 226 insertions(+), 3 deletions(-)
+ create mode 100644 src/runtime/virtcontainers/passt_endpoint.go
+
+diff --git a/src/agent/src/netlink.rs b/src/agent/src/netlink.rs
+index ed071b60..34c6df96 100644
+--- a/src/agent/src/netlink.rs
++++ b/src/agent/src/netlink.rs
+@@ -312,7 +312,8 @@ impl Handle {
+ let list = a.iter().chain(&b);
+
+ for route in list {
+- let link = self.find_link(LinkFilter::Name(&route.device)).await?;
++ // TODO: "eth0" hardcoded for passt networking model
++ let link = self.find_link(LinkFilter::Name("eth0")).await?;
+
+ const MAIN_TABLE: u8 = packet::constants::RT_TABLE_MAIN;
+ const UNICAST: u8 = packet::constants::RTN_UNICAST;
+diff --git a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go
+index e57a4b26..1756bdfd 100644
+--- a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go
++++ b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go
+@@ -682,6 +682,8 @@ const (
+
+ // VHOSTUSER is a vhost-user port (socket)
+ VHOSTUSER NetDeviceType = "vhostuser"
++
++ PASST NetDeviceType = "passt"
+ )
+
+ // QemuNetdevParam converts to the QEMU -netdev parameter notation
+@@ -709,6 +711,8 @@ func (n NetDeviceType) QemuNetdevParam(netdev *NetDevice, config *Config) string
+ log.Fatal("vhost-user devices are not supported on IBM Z")
+ }
+ return "vhost-user" // -netdev type=vhost-user (no device)
++ case PASST:
++ return "socket" // -netdev type=socket,connect=...
+ default:
+ return ""
+
+@@ -742,6 +746,8 @@ func (n NetDeviceType) QemuDeviceParam(netdev *NetDevice, config *Config) Device
+ log.Fatal("vhost-user devices are not supported on IBM Z")
+ }
+ return "" // -netdev type=vhost-user (no device)
++ case PASST:
++ device = "virtio-net"
+ default:
+ return ""
+ }
+@@ -806,6 +812,8 @@ type NetDevice struct {
+
+ // Transport is the virtio transport for this device.
+ Transport VirtioTransport
++
++ SocketPath string
+ }
+
+ // VirtioNetTransport is a map of the virtio-net device name that corresponds
+@@ -818,6 +826,10 @@ var VirtioNetTransport = map[VirtioTransport]string{
+
+ // Valid returns true if the NetDevice structure is valid and complete.
+ func (netdev NetDevice) Valid() bool {
++ if netdev.Type == PASST {
++ return true
++ }
++
+ if netdev.ID == "" || netdev.IFName == "" {
+ return false
+ }
+@@ -867,7 +879,9 @@ func (netdev NetDevice) QemuDeviceParams(config *Config) []string {
+
+ deviceParams = append(deviceParams, fmt.Sprintf("driver=%s", driver))
+ deviceParams = append(deviceParams, fmt.Sprintf("netdev=%s", netdev.ID))
+- deviceParams = append(deviceParams, fmt.Sprintf("mac=%s", netdev.MACAddress))
++ if netdev.MACAddress != "" {
++ deviceParams = append(deviceParams, fmt.Sprintf("mac=%s", netdev.MACAddress))
++ }
+
+ if netdev.Bus != "" {
+ deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", netdev.Bus))
+@@ -937,7 +951,12 @@ func (netdev NetDevice) QemuNetdevParams(config *Config) []string {
+ netdevParams = append(netdevParams, fmt.Sprintf("fds=%s", strings.Join(fdParams, ":")))
+
+ } else {
+- netdevParams = append(netdevParams, fmt.Sprintf("ifname=%s", netdev.IFName))
++ if netdev.IFName != "" {
++ netdevParams = append(netdevParams, fmt.Sprintf("ifname=%s", netdev.IFName))
++ }
++ if netdev.SocketPath != "" {
++ netdevParams = append(netdevParams, fmt.Sprintf("connect=%s", netdev.SocketPath))
++ }
+ if netdev.DownScript != "" {
+ netdevParams = append(netdevParams, fmt.Sprintf("downscript=%s", netdev.DownScript))
+ }
+diff --git a/src/runtime/virtcontainers/endpoint.go b/src/runtime/virtcontainers/endpoint.go
+index 7786bb3e..e167304a 100644
+--- a/src/runtime/virtcontainers/endpoint.go
++++ b/src/runtime/virtcontainers/endpoint.go
+@@ -65,6 +65,8 @@ const (
+
+ // IPVlanEndpointType is ipvlan network interface.
+ IPVlanEndpointType EndpointType = "ipvlan"
++
++ PasstEndpointType EndpointType = "passt"
+ )
+
+ // Set sets an endpoint type based on the input string.
+@@ -94,6 +96,9 @@ func (endpointType *EndpointType) Set(value string) error {
+ case "ipvlan":
+ *endpointType = IPVlanEndpointType
+ return nil
++ case "passt":
++ *endpointType = PasstEndpointType
++ return nil
+ default:
+ return fmt.Errorf("Unknown endpoint type %s", value)
+ }
+@@ -118,6 +123,8 @@ func (endpointType *EndpointType) String() string {
+ return string(TuntapEndpointType)
+ case IPVlanEndpointType:
+ return string(IPVlanEndpointType)
++ case PasstEndpointType:
++ return string(PasstEndpointType)
+ default:
+ return ""
+ }
+diff --git a/src/runtime/virtcontainers/network.go b/src/runtime/virtcontainers/network.go
+index e6c681da..2de692fe 100644
+--- a/src/runtime/virtcontainers/network.go
++++ b/src/runtime/virtcontainers/network.go
+@@ -57,6 +57,9 @@ const (
+ // NetXConnectNoneModel can be used when the VM is in the host network namespace
+ NetXConnectNoneModel
+
++ // passt in namespace connecting hypervisor via host sockets
++ NetXConnectPasstModel
++
+ // NetXConnectInvalidModel is the last item to Check valid values by IsValid()
+ NetXConnectInvalidModel
+ )
+@@ -73,6 +76,8 @@ const (
+
+ tcFilterNetModelStr = "tcfilter"
+
++ passtNetModelStr = "passt"
++
+ noneNetModelStr = "none"
+ )
+
+@@ -85,6 +90,8 @@ func (n *NetInterworkingModel) GetModel() string {
+ return macvtapNetModelStr
+ case NetXConnectTCFilterModel:
+ return tcFilterNetModelStr
++ case NetXConnectPasstModel:
++ return passtNetModelStr
+ case NetXConnectNoneModel:
+ return noneNetModelStr
+ }
+@@ -103,6 +110,9 @@ func (n *NetInterworkingModel) SetModel(modelName string) error {
+ case tcFilterNetModelStr:
+ *n = NetXConnectTCFilterModel
+ return nil
++ case passtNetModelStr:
++ *n = NetXConnectPasstModel
++ return nil
+ case noneNetModelStr:
+ *n = NetXConnectNoneModel
+ return nil
+@@ -254,6 +264,8 @@ func getLinkForEndpoint(endpoint Endpoint, netHandle *netlink.Handle) (netlink.L
+ link = &netlink.IPVlan{}
+ case *TuntapEndpoint:
+ link = &netlink.Tuntap{}
++ case *PasstEndpoint:
++ return nil, nil
+ default:
+ return nil, fmt.Errorf("Unexpected endpointType %s", ep.Type())
+ }
+@@ -302,6 +314,11 @@ func xConnectVMNetwork(ctx context.Context, endpoint Endpoint, h Hypervisor) err
+ span, ctx := networkTrace(ctx, "xConnectVMNetwork", endpoint)
+ defer closeSpan(span, err)
+
++ if endpoint.Type() == PasstEndpointType {
++ networkLogger().Info("VM network via passt user-mode networking")
++ return nil
++ }
++
+ netPair := endpoint.NetworkPair()
+
+ queues := 0
+@@ -347,6 +364,7 @@ func xDisconnectVMNetwork(ctx context.Context, endpoint Endpoint) error {
+ err = untapNetworkPair(ctx, endpoint)
+ case NetXConnectTCFilterModel:
+ err = removeTCFiltering(ctx, endpoint)
++ case NetXConnectPasstModel:
+ default:
+ err = fmt.Errorf("Invalid internetworking model")
+ }
+@@ -1095,6 +1113,12 @@ func createEndpoint(netInfo NetworkInfo, idx int, model NetInterworkingModel, li
+ // an appropriate EndPoint based on interface type
+ // This should be a switch
+
++ if model == NetXConnectPasstModel {
++ networkLogger().Info("creating passt endpoint")
++ endpoint, err := createPasstNetworkEndpoint(idx)
++ return endpoint, err
++ }
++
+ // Check if interface is a physical interface. Do not create
+ // tap interface/bridge if it is.
+ isPhysical, err := isPhysicalIface(netInfo.Iface.Name)
+diff --git a/src/runtime/virtcontainers/passt_endpoint.go b/src/runtime/virtcontainers/passt_endpoint.go
+new file mode 100644
+index 00000000..7f40135a
+--- /dev/null
++++ b/src/runtime/virtcontainers/passt_endpoint.go
+@@ -0,0 +1,156 @@
++// SPDX-License-Identifier: Apache-2.0
++//
++// passt_endpoint.go - passt endpoint for Kata Containers: start and stop passt
++//
++// Copyright (c) 2021-2022 Red Hat GmbH
++// Author: Stefano Brivio <sbrivio@redhat.com>
++
++package virtcontainers
++
++import (
++ "context"
++ "fmt"
++ "os"
++ "os/exec"
++ "syscall"
++
++ persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api"
++ vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
++)
++
++type PasstEndpoint struct {
++ EndpointType EndpointType
++ EndpointProperties NetworkInfo
++ PCIPath vcTypes.PciPath
++ PasstPID int
++}
++
++func createPasstNetworkEndpoint(idx int) (*PasstEndpoint, error) {
++ if idx < 0 {
++ return &PasstEndpoint{}, fmt.Errorf("invalid network endpoint index: %d", idx)
++ }
++
++ cmd := exec.Command("passt",
++ "-P", fmt.Sprintf("/tmp/kata-passt-%d.pid", idx),
++ "-s", fmt.Sprintf("/tmp/kata-passt-%d.socket", idx))
++ err := cmd.Run()
++ if err != nil {
++ return &PasstEndpoint{}, fmt.Errorf("passt failed to start: %v", err)
++ }
++
++ in, err := os.Open(fmt.Sprintf("/tmp/kata-passt-%d.pid", idx))
++ if err != nil {
++ return &PasstEndpoint{}, fmt.Errorf("Failed to read passt PID: %v", err)
++ }
++ defer in.Close()
++
++ var pid int
++ _, err = fmt.Fscanf(in, "%d", &pid)
++ if err != nil {
++ return &PasstEndpoint{}, fmt.Errorf("Failed to read passt pid: %v", err)
++ }
++
++ endpoint := &PasstEndpoint{
++ EndpointType: PasstEndpointType,
++ PasstPID: pid,
++ }
++
++ return endpoint, nil
++}
++
++func (endpoint *PasstEndpoint) Properties() NetworkInfo {
++ return endpoint.EndpointProperties
++}
++
++func (endpoint *PasstEndpoint) Type() EndpointType {
++ return endpoint.EndpointType
++}
++
++// unsupported
++func (endpoint *PasstEndpoint) HardwareAddr() string {
++ return "00:11:22:33:44:55"
++}
++
++// unsupported
++func (endpoint *PasstEndpoint) Name() string {
++ return ""
++}
++
++// unsupported
++func (endpoint *PasstEndpoint) NetworkPair() *NetworkInterfacePair {
++ return nil
++}
++
++// PciPath returns the PCI path of the endpoint.
++func (endpoint *PasstEndpoint) PciPath() vcTypes.PciPath {
++ return endpoint.PCIPath
++}
++
++// useless
++func (endpoint *PasstEndpoint) SetPciPath(pciPath vcTypes.PciPath) {
++ endpoint.PCIPath = pciPath
++}
++
++func (endpoint *PasstEndpoint) SetProperties(properties NetworkInfo) {
++ endpoint.EndpointProperties = properties
++}
++
++func (endpoint *PasstEndpoint) Attach(ctx context.Context, s *Sandbox) error {
++ h := s.hypervisor
++ if err := xConnectVMNetwork(ctx, endpoint, h); err != nil {
++ networkLogger().WithError(err).Error("Error attaching passt endpoint")
++ return err
++ }
++
++ return h.AddDevice(ctx, endpoint, NetDev)
++}
++
++func (endpoint *PasstEndpoint) Detach(ctx context.Context, netNsCreated bool, netNsPath string) error {
++ syscall.Kill(endpoint.PasstPID, syscall.SIGQUIT)
++
++ return nil
++}
++
++func (endpoint *PasstEndpoint) HotAttach(ctx context.Context, h Hypervisor) error {
++ return fmt.Errorf("HotAttach not supported by PasstEndpoint")
++}
++
++func (endpoint *PasstEndpoint) HotDetach(ctx context.Context, h Hypervisor, netNsCreated bool, netNsPath string) error {
++ return fmt.Errorf("HotDetatch not supported by PasstEndpoint")
++}
++
++func (endpoint *PasstEndpoint) save() persistapi.NetworkEndpoint {
++ return persistapi.NetworkEndpoint{
++ Type: string(endpoint.Type()),
++
++ Passt: &persistapi.PasstEndpoint{
++ PasstPID: endpoint.PasstPID,
++ },
++ }
++}
++
++func (endpoint *PasstEndpoint) load(s persistapi.NetworkEndpoint) {
++ endpoint.EndpointType = PasstEndpointType
++
++ if s.Passt != nil {
++ endpoint.PasstPID = s.Passt.PasstPID
++ }
++}
++
++// unsupported
++func (endpoint *PasstEndpoint) GetRxRateLimiter() bool {
++ return false
++}
++
++func (endpoint *PasstEndpoint) SetRxRateLimiter() error {
++ return fmt.Errorf("rx rate limiter is unsupported for physical endpoint")
++}
++
++// unsupported
++func (endpoint *PasstEndpoint) GetTxRateLimiter() bool {
++ return false
++}
++
++func (endpoint *PasstEndpoint) SetTxRateLimiter() error {
++ return fmt.Errorf("tx rate limiter is unsupported for physical endpoint")
++}
+diff --git a/src/runtime/virtcontainers/persist/api/network.go b/src/runtime/virtcontainers/persist/api/network.go
+index 51c3aac6..79d77cd9 100644
+--- a/src/runtime/virtcontainers/persist/api/network.go
++++ b/src/runtime/virtcontainers/persist/api/network.go
+@@ -79,6 +79,10 @@ type VhostUserEndpoint struct {
+ PCIPath vcTypes.PciPath
+ }
+
++type PasstEndpoint struct {
++ PasstPID int
++}
++
+ // NetworkEndpoint contains network interface information
+ type NetworkEndpoint struct {
+ // One and only one of these below are not nil according to Type.
+@@ -90,6 +94,7 @@ type NetworkEndpoint struct {
+ Tap *TapEndpoint `json:",omitempty"`
+ IPVlan *IPVlanEndpoint `json:",omitempty"`
+ Tuntap *TuntapEndpoint `json:",omitempty"`
++ Passt *PasstEndpoint `json:",omitempty"`
+
+ Type string
+ }
+diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go
+index 97cd6eb8..9ace0ace 100644
+--- a/src/runtime/virtcontainers/qemu_arch_base.go
++++ b/src/runtime/virtcontainers/qemu_arch_base.go
+@@ -615,6 +615,17 @@ func genericNetwork(endpoint Endpoint, vhost, nestedRun bool, index int) (govmmQ
+ FDs: netPair.VMFds,
+ VhostFDs: netPair.VhostFds,
+ }
++ case *PasstEndpoint:
++ d = govmmQemu.NetDevice{
++ Type: govmmQemu.PASST,
++ Driver: govmmQemu.VirtioNet,
++ ID: fmt.Sprintf("network-%d", index),
++ // TODO: Drop hardcoded MAC address, passt endpoint
++ // doesn't need to know it
++ MACAddress: "00:11:22:33:44:55",
++ DisableModern: nestedRun,
++ SocketPath: fmt.Sprintf("/tmp/kata-passt-%d.socket", index),
++ }
+ default:
+ return govmmQemu.NetDevice{}, fmt.Errorf("Unknown type for endpoint")
+ }
+--
+2.28.0
+