From 8bd53d8b9c3043047e79626efbe3ac7bd545e27c Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Tue, 16 Sep 2025 11:50:16 +0200 Subject: [PATCH 1/7] test-mac: Add ping tests This will help verify https://github.com/containers/gvisor-tap-vsock/issues/428 is really fixed. Signed-off-by: Christophe Fergeau --- test-vfkit/basic_test.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test-vfkit/basic_test.go b/test-vfkit/basic_test.go index b883003c..8bbe9349 100644 --- a/test-vfkit/basic_test.go +++ b/test-vfkit/basic_test.go @@ -16,6 +16,7 @@ import ( e2e "github.com/containers/gvisor-tap-vsock/test" "github.com/onsi/ginkgo" "github.com/onsi/gomega" + log "github.com/sirupsen/logrus" ) var _ = ginkgo.Describe("connectivity with vfkit", func() { @@ -102,3 +103,25 @@ var _ = ginkgo.Describe("upload and download with vfkit", func() { tmpDir = dlTmpDir }) }) +var _ = ginkgo.Describe("ping with gvproxy and vfkit", func() { + ginkgo.It("should succeed to ping a known domain", func() { + out, err := sshExec("ping -w2 crc.dev") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should fail to ping an unknown domain", func() { + out, err := sshExec("ping -w2 unknown.crc.dev") + log.Infof("ping: %s", out) + gomega.Expect(err).To(gomega.HaveOccurred()) + }) + ginkgo.It("should succeed to ping a known IP", func() { + out, err := sshExec("ping -w2 1.1.1.1") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should fail to ping an unknown IP", func() { + out, err := sshExec("ping -w2 7.7.7.7") + log.Infof("ping: %s", out) + gomega.Expect(err).To(gomega.HaveOccurred()) + }) +}) From 372f6603546fff2558f9de8e212e716fb539e4d2 Mon Sep 17 00:00:00 2001 From: Yevhen Vydolob Date: Fri, 26 Dec 2025 10:01:47 +0200 Subject: [PATCH 2/7] Update gvisor Signed-off-by: Yevhen Vydolob --- go.mod | 7 +- go.sum | 14 +- pkg/services/forwarder/udp.go | 7 +- vendor/golang.org/x/exp/LICENSE | 27 + vendor/golang.org/x/exp/PATENTS | 22 + .../x/exp/constraints/constraints.go | 50 + vendor/golang.org/x/time/LICENSE | 4 +- vendor/golang.org/x/time/rate/rate.go | 45 +- vendor/golang.org/x/time/rate/sometimes.go | 4 +- vendor/gvisor.dev/gvisor/LICENSE | 32 +- vendor/gvisor.dev/gvisor/pkg/bits/bits32.go | 33 - vendor/gvisor.dev/gvisor/pkg/bits/bits64.go | 33 - .../gvisor/pkg/bits/bits_template.go | 51 + .../gvisor.dev/gvisor/pkg/bits/uint64_arch.go | 2 +- vendor/gvisor.dev/gvisor/pkg/buffer/buffer.go | 7 +- .../gvisor/pkg/buffer/buffer_state_autogen.go | 4 +- .../gvisor/pkg/buffer/view_unsafe.go | 4 +- .../gvisor.dev/gvisor/pkg/context/context.go | 82 +- vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid.go | 7 +- .../gvisor/pkg/cpuid/cpuid_amd64.go | 14 + .../pkg/cpuid/cpuid_amd64_state_autogen.go | 8 +- .../gvisor/pkg/cpuid/cpuid_arm64.go | 44 + .../pkg/cpuid/cpuid_arm64_state_autogen.go | 4 +- .../goid_122_amd64.s => cpuid/hwcap_amd64.go} | 20 +- .../gvisor/pkg/cpuid/hwcap_arm64.go | 79 ++ .../gvisor/pkg/cpuid/native_amd64.go | 51 +- .../gvisor/pkg/cpuid/native_arm64.go | 12 +- .../gvisor/pkg/cpuid/static_amd64.go | 8 +- .../gvisor/pkg/goid/goid_122_arm64.s | 26 - ...goid_123_amd64.s => goid_asm_impl_amd64.s} | 4 +- ...goid_123_arm64.s => goid_asm_impl_arm64.s} | 4 +- vendor/gvisor.dev/gvisor/pkg/log/bug.go | 161 +++ vendor/gvisor.dev/gvisor/pkg/log/file.go | 60 ++ vendor/gvisor.dev/gvisor/pkg/log/log.go | 4 +- vendor/gvisor.dev/gvisor/pkg/refs/refs_map.go | 4 +- .../pkg/sleep/sleep_unsafe_state_autogen.go | 8 +- .../gvisor.dev/gvisor/pkg/state/addr_set.go | 95 +- vendor/gvisor.dev/gvisor/pkg/state/decode.go | 105 +- vendor/gvisor.dev/gvisor/pkg/state/encode.go | 11 +- .../state/{complete_list.go => ods_list.go} | 84 +- vendor/gvisor.dev/gvisor/pkg/state/state.go | 2 +- vendor/gvisor.dev/gvisor/pkg/state/stats.go | 12 +- vendor/gvisor.dev/gvisor/pkg/state/types.go | 34 +- vendor/gvisor.dev/gvisor/pkg/sync/aliases.go | 3 +- .../gvisor/pkg/sync/checklocks_off_unsafe.go | 3 +- .../gvisor/pkg/sync/checklocks_on_unsafe.go | 3 +- .../gvisor.dev/gvisor/pkg/sync/gate_unsafe.go | 9 +- .../gvisor/pkg/sync/goyield_go113_unsafe.go | 3 +- .../gvisor/pkg/sync/goyield_unsafe.go | 3 +- .../gvisor/pkg/sync/mutex_unsafe.go | 3 +- .../gvisor/pkg/sync/norace_unsafe.go | 3 +- .../gvisor.dev/gvisor/pkg/sync/race_unsafe.go | 3 +- .../gvisor/pkg/sync/runtime_amd64.go | 3 +- ...stants.go => runtime_constants_go_impl.go} | 6 +- ...cer2.go => runtime_exectracer2_go_impl.go} | 4 +- .../gvisor/pkg/sync/runtime_go121_unsafe.go | 5 +- .../gvisor/pkg/sync/runtime_go124_unsafe.go | 16 + .../gvisor/pkg/sync/runtime_go126_unsafe.go | 16 + .../pkg/sync/runtime_not_go121_unsafe.go | 3 +- .../gvisor/pkg/sync/runtime_other.go | 3 +- ...64.s => runtime_spinning_asm_impl_amd64.s} | 2 +- .../sync/runtime_spinning_asm_impl_arm64.s | 18 + .../gvisor/pkg/sync/runtime_spinning_other.s | 8 +- .../gvisor/pkg/sync/runtime_unsafe.go | 3 +- .../gvisor/pkg/sync/rwmutex_unsafe.go | 4 +- vendor/gvisor.dev/gvisor/pkg/sync/seqcount.go | 3 +- vendor/gvisor.dev/gvisor/pkg/sync/sync.go | 3 +- .../gvisor/pkg/tcpip/adapters/gonet/gonet.go | 2 +- vendor/gvisor.dev/gvisor/pkg/tcpip/errors.go | 18 + .../gvisor/pkg/tcpip/header/ipv4.go | 65 ++ .../tcpip/header/ipv6_extension_headers.go | 78 +- .../gvisor.dev/gvisor/pkg/tcpip/header/tcp.go | 11 +- .../gvisor/pkg/tcpip/header/virtionet.go | 8 +- .../gvisor/pkg/tcpip/link/nested/nested.go | 5 + .../gvisor/pkg/tcpip/link/sniffer/sniffer.go | 10 +- .../gvisor/pkg/tcpip/network/arp/arp.go | 5 + .../internal/fragmentation/reassembler.go | 4 +- .../ip/duplicate_address_detection.go | 2 +- .../internal/ip/generic_multicast_protocol.go | 6 +- .../network/internal/ip/ip_state_autogen.go | 23 +- .../pkg/tcpip/network/internal/ip/stats.go | 9 +- .../multicast/multicast_state_autogen.go | 11 +- .../network/internal/multicast/route_table.go | 2 +- .../gvisor/pkg/tcpip/network/ipv4/icmp.go | 49 +- .../gvisor/pkg/tcpip/network/ipv4/ipv4.go | 130 ++- .../gvisor.dev/gvisor/pkg/tcpip/socketops.go | 15 + .../pkg/tcpip/stack/address_state_mutex.go | 2 +- .../tcpip/stack/addressable_endpoint_state.go | 6 +- .../stack/addressable_endpoint_state_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/bridge.go | 94 +- .../gvisor/pkg/tcpip/stack/bridge_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/bucket_mutex.go | 2 +- .../tcpip/stack/cleanup_endpoints_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/conn_mutex.go | 2 +- .../pkg/tcpip/stack/conn_track_mutex.go | 2 +- .../pkg/tcpip/stack/endpoints_by_nic_mutex.go | 2 +- .../pkg/tcpip/stack/headertype_string.go | 2 +- .../gvisor/pkg/tcpip/stack/iptables.go | 89 +- .../gvisor/pkg/tcpip/stack/iptables_mutex.go | 2 +- .../pkg/tcpip/stack/iptables_targets.go | 20 + .../gvisor/pkg/tcpip/stack/iptables_types.go | 6 +- .../tcpip/stack/multi_port_endpoint_mutex.go | 2 +- .../pkg/tcpip/stack/neighbor_cache_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/neighbor_entry.go | 44 +- .../pkg/tcpip/stack/neighbor_entry_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/nftables_types.go | 170 +++ .../gvisor.dev/gvisor/pkg/tcpip/stack/nic.go | 38 +- .../gvisor/pkg/tcpip/stack/nic_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/packet_buffer.go | 50 + .../tcpip/stack/packet_endpoint_list_mutex.go | 2 +- .../pkg/tcpip/stack/packet_eps_mutex.go | 2 +- .../packets_pending_link_resolution_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/pending_packets.go | 7 +- .../gvisor/pkg/tcpip/stack/registration.go | 84 +- .../gvisor/pkg/tcpip/stack/route.go | 16 +- .../gvisor/pkg/tcpip/stack/route_mutex.go | 2 +- .../pkg/tcpip/stack/route_stack_mutex.go | 2 +- .../gvisor/pkg/tcpip/stack/save_restore.go | 54 + .../gvisor/pkg/tcpip/stack/stack.go | 457 +++++--- .../gvisor/pkg/tcpip/stack/stack_mutex.go | 2 +- .../pkg/tcpip/stack/stack_state_autogen.go | 994 +++++------------- .../pkg/tcpip/stack/state_conn_mutex.go | 2 +- .../tcpip/stack/transport_endpoints_mutex.go | 2 +- .../gvisor.dev/gvisor/pkg/tcpip/stdclock.go | 1 - vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip.go | 60 +- .../gvisor/pkg/tcpip/tcpip_state_autogen.go | 129 ++- vendor/gvisor.dev/gvisor/pkg/tcpip/timer.go | 2 +- .../pkg/tcpip/transport/icmp/endpoint.go | 2 +- .../tcpip/transport/icmp/endpoint_state.go | 22 +- .../transport/icmp/icmp_state_autogen.go | 51 +- .../pkg/tcpip/transport/icmp/protocol.go | 3 + .../transport/internal/network/endpoint.go | 152 ++- .../internal/network/endpoint_state.go | 20 +- .../internal/network/network_state_autogen.go | 81 +- .../pkg/tcpip/transport/packet/endpoint.go | 174 ++- .../tcpip/transport/packet/endpoint_mutex.go | 96 ++ .../transport/packet/endpoint_rcv_mutex.go | 64 ++ .../tcpip/transport/packet/endpoint_state.go | 19 +- .../transport/packet/last_error_mutex.go | 64 ++ .../transport/packet/packet_mmap_mutex.go | 96 ++ .../transport/packet/packet_state_autogen.go | 64 +- .../pkg/tcpip/transport/raw/endpoint.go | 2 +- .../pkg/tcpip/transport/raw/endpoint_state.go | 22 +- .../tcpip/transport/raw/raw_state_autogen.go | 55 +- .../gvisor/pkg/tcpip/transport/tcp/accept.go | 20 +- .../pkg/tcpip/transport/tcp/accept_mutex.go | 64 ++ .../gvisor/pkg/tcpip/transport/tcp/connect.go | 188 ++-- .../pkg/tcpip/transport/tcp/connect_unsafe.go | 3 +- .../gvisor/pkg/tcpip/transport/tcp/cubic.go | 22 +- .../pkg/tcpip/transport/tcp/dispatcher.go | 40 +- .../tcpip/transport/tcp/dispatcher_mutex.go | 64 ++ .../pkg/tcpip/transport/tcp/endpoint.go | 176 ++-- .../pkg/tcpip/transport/tcp/endpoint_state.go | 240 +++-- .../pkg/tcpip/transport/tcp/ep_queue_mutex.go | 64 ++ .../pkg/tcpip/transport/tcp/forwarder.go | 14 +- .../tcpip/transport/tcp/forwarder_mutex.go | 64 ++ .../transport/tcp/forwarder_request_mutex.go | 64 ++ .../pkg/tcpip/transport/tcp/hasher_mutex.go | 64 ++ .../tcpip/transport/tcp/keepalive_mutex.go | 64 ++ .../tcpip/transport/tcp/last_error_mutex.go | 64 ++ .../transport/tcp/pending_processing_mutex.go | 64 ++ .../pkg/tcpip/transport/tcp/protocol.go | 59 +- .../pkg/tcpip/transport/tcp/protocol_mutex.go | 96 ++ .../gvisor/pkg/tcpip/transport/tcp/rack.go | 11 +- .../gvisor/pkg/tcpip/transport/tcp/rcv.go | 10 +- .../tcpip/transport/tcp/rcv_queue_mutex.go | 64 ++ .../gvisor/pkg/tcpip/transport/tcp/reno.go | 12 + .../pkg/tcpip/transport/tcp/rtt_mutex.go | 64 ++ .../tcpip/transport/tcp/sack_scoreboard.go | 69 +- .../gvisor/pkg/tcpip/transport/tcp/segment.go | 2 +- .../pkg/tcpip/transport/tcp/segment_queue.go | 8 +- .../transport/tcp/segment_queue_mutex.go | 64 ++ .../gvisor/pkg/tcpip/transport/tcp/snd.go | 161 ++- .../tcpip/transport/tcp/snd_queue_mutex.go | 64 ++ .../{stack/tcp.go => transport/tcp/state.go} | 16 +- .../tcpip/transport/tcp/tcp_state_autogen.go | 851 +++++++++++++-- .../pkg/tcpip/transport/udp/endpoint.go | 24 +- .../pkg/tcpip/transport/udp/endpoint_state.go | 26 +- .../pkg/tcpip/transport/udp/forwarder.go | 28 +- .../pkg/tcpip/transport/udp/protocol.go | 3 + .../tcpip/transport/udp/udp_state_autogen.go | 75 +- vendor/gvisor.dev/gvisor/pkg/waiter/waiter.go | 47 +- vendor/modules.txt | 11 +- 183 files changed, 6020 insertions(+), 2248 deletions(-) create mode 100644 vendor/golang.org/x/exp/LICENSE create mode 100644 vendor/golang.org/x/exp/PATENTS create mode 100644 vendor/golang.org/x/exp/constraints/constraints.go delete mode 100644 vendor/gvisor.dev/gvisor/pkg/bits/bits32.go delete mode 100644 vendor/gvisor.dev/gvisor/pkg/bits/bits64.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/bits/bits_template.go rename vendor/gvisor.dev/gvisor/pkg/{goid/goid_122_amd64.s => cpuid/hwcap_amd64.go} (67%) create mode 100644 vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_arm64.go delete mode 100644 vendor/gvisor.dev/gvisor/pkg/goid/goid_122_arm64.s rename vendor/gvisor.dev/gvisor/pkg/goid/{goid_123_amd64.s => goid_asm_impl_amd64.s} (91%) rename vendor/gvisor.dev/gvisor/pkg/goid/{goid_123_arm64.s => goid_asm_impl_arm64.s} (91%) create mode 100644 vendor/gvisor.dev/gvisor/pkg/log/bug.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/log/file.go rename vendor/gvisor.dev/gvisor/pkg/state/{complete_list.go => ods_list.go} (60%) rename vendor/gvisor.dev/gvisor/pkg/sync/{runtime_constants.go => runtime_constants_go_impl.go} (75%) rename vendor/gvisor.dev/gvisor/pkg/sync/{runtime_exectracer2.go => runtime_exectracer2_go_impl.go} (81%) create mode 100644 vendor/gvisor.dev/gvisor/pkg/sync/runtime_go124_unsafe.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/sync/runtime_go126_unsafe.go rename vendor/gvisor.dev/gvisor/pkg/sync/{runtime_spinning_amd64.s => runtime_spinning_asm_impl_amd64.s} (91%) create mode 100644 vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_arm64.s create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nftables_types.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/stack/save_restore.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_rcv_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/last_error_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_mmap_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/dispatcher_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/ep_queue_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_request_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/hasher_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/keepalive_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/last_error_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/pending_processing_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv_queue_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rtt_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue_mutex.go create mode 100644 vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd_queue_mutex.go rename vendor/gvisor.dev/gvisor/pkg/tcpip/{stack/tcp.go => transport/tcp/state.go} (97%) diff --git a/go.mod b/go.mod index 5d654cfd..ba33f6ac 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/containers/gvisor-tap-vsock -go 1.25.0 +go 1.25.5 require ( github.com/Microsoft/go-winio v0.6.2 @@ -28,7 +28,7 @@ require ( golang.org/x/sync v0.20.0 golang.org/x/sys v0.43.0 gopkg.in/yaml.v3 v3.0.1 - gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f + gvisor.dev/gvisor v0.0.0-20260413194555-9680d69bf798 ) require ( @@ -44,9 +44,10 @@ require ( github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701 // indirect github.com/vishvananda/netns v0.0.5 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect golang.org/x/net v0.52.0 // indirect golang.org/x/text v0.36.0 // indirect - golang.org/x/time v0.5.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.43.0 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect ) diff --git a/go.sum b/go.sum index 624dcd44..8e5cccee 100644 --- a/go.sum +++ b/go.sum @@ -112,6 +112,8 @@ golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf golang.org/x/crypto v0.15.0/go.mod h1:4ChreQoLWfG3xLDer1WdlH5NdlQ3+mwnQq1YTKY+72g= golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= +golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ= +golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -185,8 +187,8 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= @@ -207,8 +209,8 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= -google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -219,5 +221,5 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f h1:O2w2DymsOlM/nv2pLNWCMCYOldgBBMkD7H0/prN5W2k= -gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f/go.mod h1:sxc3Uvk/vHcd3tj7/DHVBoR5wvWT/MmRq2pj7HRJnwU= +gvisor.dev/gvisor v0.0.0-20260413194555-9680d69bf798 h1:7h21uRYtzPJjXp9W/BZLx68z8TwW8+h5BPdI4P5ZkJ0= +gvisor.dev/gvisor v0.0.0-20260413194555-9680d69bf798/go.mod h1:xQ2PWgHmWJA/Ph4i1q1jBm39BKhc3W0DXqWoDSyuBOY= diff --git a/pkg/services/forwarder/udp.go b/pkg/services/forwarder/udp.go index 0ffdf044..2016062d 100644 --- a/pkg/services/forwarder/udp.go +++ b/pkg/services/forwarder/udp.go @@ -15,11 +15,11 @@ import ( ) func UDP(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mutex) *udp.Forwarder { - return udp.NewForwarder(s, func(r *udp.ForwarderRequest) { + return udp.NewForwarder(s, func(r *udp.ForwarderRequest) bool { localAddress := r.ID().LocalAddress if linkLocal().Contains(localAddress) || localAddress == header.IPv4Broadcast { - return + return true } natLock.Lock() @@ -37,7 +37,7 @@ func UDP(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mute } else { log.Errorf("r.CreateEndpoint() = %v", tcpErr) } - return + return false } p, _ := NewUDPProxy(&autoStoppingListener{underlying: gonet.NewUDPConn(&wq, ep)}, func() (net.Conn, error) { @@ -51,5 +51,6 @@ func UDP(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mute // forwarder request. ep.Close() }() + return true }) } diff --git a/vendor/golang.org/x/exp/LICENSE b/vendor/golang.org/x/exp/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/exp/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/exp/PATENTS b/vendor/golang.org/x/exp/PATENTS new file mode 100644 index 00000000..73309904 --- /dev/null +++ b/vendor/golang.org/x/exp/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/exp/constraints/constraints.go b/vendor/golang.org/x/exp/constraints/constraints.go new file mode 100644 index 00000000..2c033dff --- /dev/null +++ b/vendor/golang.org/x/exp/constraints/constraints.go @@ -0,0 +1,50 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package constraints defines a set of useful constraints to be used +// with type parameters. +package constraints + +// Signed is a constraint that permits any signed integer type. +// If future releases of Go add new predeclared signed integer types, +// this constraint will be modified to include them. +type Signed interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 +} + +// Unsigned is a constraint that permits any unsigned integer type. +// If future releases of Go add new predeclared unsigned integer types, +// this constraint will be modified to include them. +type Unsigned interface { + ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr +} + +// Integer is a constraint that permits any integer type. +// If future releases of Go add new predeclared integer types, +// this constraint will be modified to include them. +type Integer interface { + Signed | Unsigned +} + +// Float is a constraint that permits any floating-point type. +// If future releases of Go add new predeclared floating-point types, +// this constraint will be modified to include them. +type Float interface { + ~float32 | ~float64 +} + +// Complex is a constraint that permits any complex numeric type. +// If future releases of Go add new predeclared complex numeric types, +// this constraint will be modified to include them. +type Complex interface { + ~complex64 | ~complex128 +} + +// Ordered is a constraint that permits any ordered type: any type +// that supports the operators < <= >= >. +// If future releases of Go add new ordered types, +// this constraint will be modified to include them. +type Ordered interface { + Integer | Float | ~string +} diff --git a/vendor/golang.org/x/time/LICENSE b/vendor/golang.org/x/time/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/time/LICENSE +++ b/vendor/golang.org/x/time/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/time/rate/rate.go b/vendor/golang.org/x/time/rate/rate.go index 8f6c7f49..794b2e32 100644 --- a/vendor/golang.org/x/time/rate/rate.go +++ b/vendor/golang.org/x/time/rate/rate.go @@ -85,7 +85,7 @@ func (lim *Limiter) Burst() int { // TokensAt returns the number of tokens available at time t. func (lim *Limiter) TokensAt(t time.Time) float64 { lim.mu.Lock() - _, tokens := lim.advance(t) // does not mutate lim + tokens := lim.advance(t) // does not mutate lim lim.mu.Unlock() return tokens } @@ -99,8 +99,9 @@ func (lim *Limiter) Tokens() float64 { // bursts of at most b tokens. func NewLimiter(r Limit, b int) *Limiter { return &Limiter{ - limit: r, - burst: b, + limit: r, + burst: b, + tokens: float64(b), } } @@ -185,7 +186,7 @@ func (r *Reservation) CancelAt(t time.Time) { return } // advance time to now - t, tokens := r.lim.advance(t) + tokens := r.lim.advance(t) // calculate new number of tokens tokens += restoreTokens if burst := float64(r.lim.burst); tokens > burst { @@ -306,7 +307,7 @@ func (lim *Limiter) SetLimitAt(t time.Time, newLimit Limit) { lim.mu.Lock() defer lim.mu.Unlock() - t, tokens := lim.advance(t) + tokens := lim.advance(t) lim.last = t lim.tokens = tokens @@ -323,7 +324,7 @@ func (lim *Limiter) SetBurstAt(t time.Time, newBurst int) { lim.mu.Lock() defer lim.mu.Unlock() - t, tokens := lim.advance(t) + tokens := lim.advance(t) lim.last = t lim.tokens = tokens @@ -344,21 +345,9 @@ func (lim *Limiter) reserveN(t time.Time, n int, maxFutureReserve time.Duration) tokens: n, timeToAct: t, } - } else if lim.limit == 0 { - var ok bool - if lim.burst >= n { - ok = true - lim.burst -= n - } - return Reservation{ - ok: ok, - lim: lim, - tokens: lim.burst, - timeToAct: t, - } } - t, tokens := lim.advance(t) + tokens := lim.advance(t) // Calculate the remaining number of tokens resulting from the request. tokens -= float64(n) @@ -391,10 +380,11 @@ func (lim *Limiter) reserveN(t time.Time, n int, maxFutureReserve time.Duration) return r } -// advance calculates and returns an updated state for lim resulting from the passage of time. +// advance calculates and returns an updated number of tokens for lim +// resulting from the passage of time. // lim is not changed. // advance requires that lim.mu is held. -func (lim *Limiter) advance(t time.Time) (newT time.Time, newTokens float64) { +func (lim *Limiter) advance(t time.Time) (newTokens float64) { last := lim.last if t.Before(last) { last = t @@ -407,7 +397,7 @@ func (lim *Limiter) advance(t time.Time) (newT time.Time, newTokens float64) { if burst := float64(lim.burst); tokens > burst { tokens = burst } - return t, tokens + return tokens } // durationFromTokens is a unit conversion function from the number of tokens to the duration @@ -416,8 +406,15 @@ func (limit Limit) durationFromTokens(tokens float64) time.Duration { if limit <= 0 { return InfDuration } - seconds := tokens / float64(limit) - return time.Duration(float64(time.Second) * seconds) + + duration := (tokens / float64(limit)) * float64(time.Second) + + // Cap the duration to the maximum representable int64 value, to avoid overflow. + if duration > float64(math.MaxInt64) { + return InfDuration + } + + return time.Duration(duration) } // tokensFromDuration is a unit conversion function from a time duration to the number of tokens diff --git a/vendor/golang.org/x/time/rate/sometimes.go b/vendor/golang.org/x/time/rate/sometimes.go index 6ba99ddb..9b839326 100644 --- a/vendor/golang.org/x/time/rate/sometimes.go +++ b/vendor/golang.org/x/time/rate/sometimes.go @@ -61,7 +61,9 @@ func (s *Sometimes) Do(f func()) { (s.Every > 0 && s.count%s.Every == 0) || (s.Interval > 0 && time.Since(s.last) >= s.Interval) { f() - s.last = time.Now() + if s.Interval > 0 { + s.last = time.Now() + } } s.count++ } diff --git a/vendor/gvisor.dev/gvisor/LICENSE b/vendor/gvisor.dev/gvisor/LICENSE index 74fddbbd..f7a006d1 100644 --- a/vendor/gvisor.dev/gvisor/LICENSE +++ b/vendor/gvisor.dev/gvisor/LICENSE @@ -221,4 +221,34 @@ Some files carry the following license, noted at the top of each file: AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. \ No newline at end of file + THE SOFTWARE. + +------------------ + +Some files carry the "BSD" license, noted at the top of each file: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Google Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/vendor/gvisor.dev/gvisor/pkg/bits/bits32.go b/vendor/gvisor.dev/gvisor/pkg/bits/bits32.go deleted file mode 100644 index 28134a9e..00000000 --- a/vendor/gvisor.dev/gvisor/pkg/bits/bits32.go +++ /dev/null @@ -1,33 +0,0 @@ -package bits - -// IsOn returns true if *all* bits set in 'bits' are set in 'mask'. -func IsOn32(mask, bits uint32) bool { - return mask&bits == bits -} - -// IsAnyOn returns true if *any* bit set in 'bits' is set in 'mask'. -func IsAnyOn32(mask, bits uint32) bool { - return mask&bits != 0 -} - -// Mask returns a T with all of the given bits set. -func Mask32(is ...int) uint32 { - ret := uint32(0) - for _, i := range is { - ret |= MaskOf32(i) - } - return ret -} - -// MaskOf is like Mask, but sets only a single bit (more efficiently). -func MaskOf32(i int) uint32 { - return uint32(1) << uint32(i) -} - -// IsPowerOfTwo returns true if v is power of 2. -func IsPowerOfTwo32(v uint32) bool { - if v == 0 { - return false - } - return v&(v-1) == 0 -} diff --git a/vendor/gvisor.dev/gvisor/pkg/bits/bits64.go b/vendor/gvisor.dev/gvisor/pkg/bits/bits64.go deleted file mode 100644 index 73117b19..00000000 --- a/vendor/gvisor.dev/gvisor/pkg/bits/bits64.go +++ /dev/null @@ -1,33 +0,0 @@ -package bits - -// IsOn returns true if *all* bits set in 'bits' are set in 'mask'. -func IsOn64(mask, bits uint64) bool { - return mask&bits == bits -} - -// IsAnyOn returns true if *any* bit set in 'bits' is set in 'mask'. -func IsAnyOn64(mask, bits uint64) bool { - return mask&bits != 0 -} - -// Mask returns a T with all of the given bits set. -func Mask64(is ...int) uint64 { - ret := uint64(0) - for _, i := range is { - ret |= MaskOf64(i) - } - return ret -} - -// MaskOf is like Mask, but sets only a single bit (more efficiently). -func MaskOf64(i int) uint64 { - return uint64(1) << uint64(i) -} - -// IsPowerOfTwo returns true if v is power of 2. -func IsPowerOfTwo64(v uint64) bool { - if v == 0 { - return false - } - return v&(v-1) == 0 -} diff --git a/vendor/gvisor.dev/gvisor/pkg/bits/bits_template.go b/vendor/gvisor.dev/gvisor/pkg/bits/bits_template.go new file mode 100644 index 00000000..0d65312f --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/bits/bits_template.go @@ -0,0 +1,51 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bits + +import "golang.org/x/exp/constraints" + +// Non-atomic bit operations on integral types. + +// IsOn returns true if *all* bits set in 'bits' are set in 'mask'. +func IsOn[T constraints.Integer](mask, bits T) bool { + return mask&bits == bits +} + +// IsAnyOn returns true if *any* bit set in 'bits' is set in 'mask'. +func IsAnyOn[T constraints.Integer](mask, bits T) bool { + return mask&bits != 0 +} + +// Mask returns a T with all of the given bits set. +func Mask[T constraints.Integer](is ...int) T { + ret := T(0) + for _, i := range is { + ret |= MaskOf[T](i) + } + return ret +} + +// MaskOf is like Mask, but sets only a single bit (more efficiently). +func MaskOf[T constraints.Integer](i int) T { + return T(1) << T(i) +} + +// IsPowerOfTwo returns true if v is power of 2. +func IsPowerOfTwo[T constraints.Integer](v T) bool { + if v == 0 { + return false + } + return v&(v-1) == 0 +} diff --git a/vendor/gvisor.dev/gvisor/pkg/bits/uint64_arch.go b/vendor/gvisor.dev/gvisor/pkg/bits/uint64_arch.go index fc563416..c31dc60c 100644 --- a/vendor/gvisor.dev/gvisor/pkg/bits/uint64_arch.go +++ b/vendor/gvisor.dev/gvisor/pkg/bits/uint64_arch.go @@ -32,6 +32,6 @@ func ForEachSetBit64(x uint64, f func(i int)) { for x != 0 { i := TrailingZeros64(x) f(i) - x &^= MaskOf64(i) + x &^= MaskOf[uint64](i) } } diff --git a/vendor/gvisor.dev/gvisor/pkg/buffer/buffer.go b/vendor/gvisor.dev/gvisor/pkg/buffer/buffer.go index 3e6bc6dd..0f352c0d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/buffer/buffer.go +++ b/vendor/gvisor.dev/gvisor/pkg/buffer/buffer.go @@ -318,8 +318,11 @@ func (b *Buffer) PullUp(offset, length int) (View, bool) { if x := curr.Intersect(tgt); x.Len() == tgt.Len() { // buf covers the whole requested target range. sub := x.Offset(-curr.begin) - // Don't increment the reference count of the underlying chunk. Views - // returned by PullUp are explicitly unowned and read only + if v.sharesChunk() { + old := v.chunk + v.chunk = v.chunk.Clone() + old.DecRef() + } new := View{ read: v.read + sub.begin, write: v.read + sub.end, diff --git a/vendor/gvisor.dev/gvisor/pkg/buffer/buffer_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/buffer/buffer_state_autogen.go index 3e32338f..91a0c0a9 100644 --- a/vendor/gvisor.dev/gvisor/pkg/buffer/buffer_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/buffer/buffer_state_autogen.go @@ -24,8 +24,8 @@ func (b *Buffer) beforeSave() {} // +checklocksignore func (b *Buffer) StateSave(stateSinkObject state.Sink) { b.beforeSave() - var dataValue []byte - dataValue = b.saveData() + dataValue := b.saveData() + _ = ([]byte)(dataValue) stateSinkObject.SaveValue(0, dataValue) stateSinkObject.Save(1, &b.size) } diff --git a/vendor/gvisor.dev/gvisor/pkg/buffer/view_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/buffer/view_unsafe.go index cef7e7ed..b03fec95 100644 --- a/vendor/gvisor.dev/gvisor/pkg/buffer/view_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/buffer/view_unsafe.go @@ -15,12 +15,10 @@ package buffer import ( - "reflect" "unsafe" ) // BasePtr returns a pointer to the view's chunk. func (v *View) BasePtr() *byte { - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&v.chunk.data)) - return (*byte)(unsafe.Pointer(hdr.Data)) + return unsafe.SliceData(v.chunk.data) } diff --git a/vendor/gvisor.dev/gvisor/pkg/context/context.go b/vendor/gvisor.dev/gvisor/pkg/context/context.go index 7f94da24..7af96282 100644 --- a/vendor/gvisor.dev/gvisor/pkg/context/context.go +++ b/vendor/gvisor.dev/gvisor/pkg/context/context.go @@ -43,6 +43,9 @@ type Blocker interface { // Interrupted notes whether this context is Interrupted. Interrupted() bool + // Killed returns true if this context is interrupted by a fatal signal. + Killed() bool + // BlockOn blocks until one of the previously registered events occurs, // or some external interrupt (cancellation). // @@ -55,25 +58,32 @@ type Blocker interface { // is interrupted. Block(C <-chan struct{}) error + // BlockWithTimeout blocks until an event is received from C, the timeout + // has elapsed (only if haveTimeout is true), or some external interrupt. + // + // It returns: + // - The remaining timeout, which is guaranteed to be 0 if the timeout + // expired, and is unspecified if haveTimeout is false. + // - An error which if the timeout expired or if interrupted. + BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.Duration) (time.Duration, error) + // BlockWithTimeoutOn blocks until either the conditions of Block are // satisfied, or the timeout is hit. Note that deadlines are not supported // since the notion of "with respect to what clock" is not resolved. // - // The return value is per BlockOn. + // It returns: + // - The remaining timeout, which is guaranteed to be 0 if the timeout + // expired. + // - Boolean as per BlockOn return value. BlockWithTimeoutOn(waiter.Waitable, waiter.EventMask, time.Duration) (time.Duration, bool) // UninterruptibleSleepStart indicates the beginning of an uninterruptible - // sleep state (equivalent to Linux's TASK_UNINTERRUPTIBLE). If deactivate - // is true and the Context represents a Task, the Task's AddressSpace is - // deactivated. - UninterruptibleSleepStart(deactivate bool) + // sleep state (equivalent to Linux's TASK_UNINTERRUPTIBLE). + UninterruptibleSleepStart() // UninterruptibleSleepFinish indicates the end of an uninterruptible sleep - // state that was begun by a previous call to UninterruptibleSleepStart. If - // activate is true and the Context represents a Task, the Task's - // AddressSpace is activated. Normally activate is the same value as the - // deactivate parameter passed to UninterruptibleSleepStart. - UninterruptibleSleepFinish(activate bool) + // state that was begun by a previous call to UninterruptibleSleepStart. + UninterruptibleSleepFinish() } // NoTask is an implementation of Blocker that does not block. @@ -91,7 +101,12 @@ func (nt *NoTask) Interrupt() { // Interrupted implements Blocker.Interrupted. func (nt *NoTask) Interrupted() bool { - return nt.cancel != nil && len(nt.cancel) > 0 + return len(nt.cancel) > 0 +} + +// Killed implements Blocker.Killed. +func (nt *NoTask) Killed() bool { + return false } // Block implements Blocker.Block. @@ -123,34 +138,47 @@ func (nt *NoTask) BlockOn(w waiter.Waitable, mask waiter.EventMask) bool { } } -// BlockWithTimeoutOn implements Blocker.BlockWithTimeoutOn. -func (nt *NoTask) BlockWithTimeoutOn(w waiter.Waitable, mask waiter.EventMask, duration time.Duration) (time.Duration, bool) { +// BlockWithTimeout implements Blocker.BlockWithTimeout. +func (nt *NoTask) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.Duration) (time.Duration, error) { + if !haveTimeout { + return timeout, nt.Block(C) + } + if nt.cancel == nil { nt.cancel = make(chan struct{}, 1) } - e, ch := waiter.NewChannelEntry(mask) - w.EventRegister(&e) - defer w.EventUnregister(&e) start := time.Now() // In system time. - t := time.AfterFunc(duration, func() { ch <- struct{}{} }) + remainingTimeout := func() time.Duration { + rt := timeout - time.Since(start) + if rt < 0 { + rt = 0 + } + return rt + } select { case <-nt.cancel: - return time.Since(start), false // Interrupted. - case _, ok := <-ch: - if ok && t.Stop() { - // Timer never fired. - return time.Since(start), ok - } - // Timer fired, remain is zero. - return time.Duration(0), ok + return remainingTimeout(), errors.New("interrupted system call") // Interrupted. + case <-C: + return remainingTimeout(), nil + case <-time.After(timeout): + return 0, errors.New("timeout expired") } } +// BlockWithTimeoutOn implements Blocker.BlockWithTimeoutOn. +func (nt *NoTask) BlockWithTimeoutOn(w waiter.Waitable, mask waiter.EventMask, timeout time.Duration) (time.Duration, bool) { + e, ch := waiter.NewChannelEntry(mask) + w.EventRegister(&e) + defer w.EventUnregister(&e) + left, err := nt.BlockWithTimeout(ch, true, timeout) + return left, err == nil +} + // UninterruptibleSleepStart implmenents Blocker.UninterruptedSleepStart. -func (*NoTask) UninterruptibleSleepStart(bool) {} +func (*NoTask) UninterruptibleSleepStart() {} // UninterruptibleSleepFinish implmenents Blocker.UninterruptibleSleepFinish. -func (*NoTask) UninterruptibleSleepFinish(bool) {} +func (*NoTask) UninterruptibleSleepFinish() {} // Context represents a thread of execution (hereafter "goroutine" to reflect // Go idiosyncrasy). It carries state associated with the goroutine across API diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid.go index df5acf67..e3a62348 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid.go @@ -233,9 +233,10 @@ func readHWCap(auxvFilepath string) (hwCap, error) { for i := 0; i < l; i++ { tag := binary.LittleEndian.Uint64(auxv[i*16:]) val := binary.LittleEndian.Uint64(auxv[i*16+8:]) - if tag == _AT_HWCAP { + switch tag { + case _AT_HWCAP: c.hwCap1 = val - } else if tag == _AT_HWCAP2 { + case _AT_HWCAP2: c.hwCap2 = val } @@ -249,7 +250,7 @@ func readHWCap(auxvFilepath string) (hwCap, error) { func initHWCap() { c, err := readHWCap("/proc/self/auxv") if err != nil { - log.Warningf("cpuid HWCap not initialized: %w", err) + log.Warningf("cpuid HWCap not initialized: %v", err) } else { hostFeatureSet.hwCap = c } diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64.go index 829e089e..0f7e1d0b 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64.go @@ -480,3 +480,17 @@ func (fs FeatureSet) archCheckHostCompatible(hfs FeatureSet) error { return nil } + +// AllowedHWCap1 returns the HWCAP1 bits that the guest is allowed to depend +// on. +func (fs FeatureSet) AllowedHWCap1() uint64 { + // HWCAPS are not supported on amd64. + return 0 +} + +// AllowedHWCap2 returns the HWCAP2 bits that the guest is allowed to depend +// on. +func (fs FeatureSet) AllowedHWCap2() uint64 { + // HWCAPS are not supported on amd64. + return 0 +} diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64_state_autogen.go index bb416970..77dc2c96 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_amd64_state_autogen.go @@ -1,7 +1,7 @@ // automatically generated by stateify. -//go:build amd64 && amd64 && amd64 && amd64 -// +build amd64,amd64,amd64,amd64 +//go:build amd64 && amd64 && amd64 && amd64 && amd64 +// +build amd64,amd64,amd64,amd64,amd64 package cpuid @@ -27,8 +27,8 @@ func (fs *FeatureSet) beforeSave() {} // +checklocksignore func (fs *FeatureSet) StateSave(stateSinkObject state.Sink) { fs.beforeSave() - var FunctionValue Static - FunctionValue = fs.saveFunction() + FunctionValue := fs.saveFunction() + _ = (Static)(FunctionValue) stateSinkObject.SaveValue(0, FunctionValue) stateSinkObject.Save(1, &fs.hwCap) } diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64.go index 964f33ac..3072a37a 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64.go @@ -108,3 +108,47 @@ func (fs FeatureSet) WriteCPUInfoTo(cpu, numCPU uint, w io.Writer) { func (FeatureSet) archCheckHostCompatible(FeatureSet) error { return nil } + +// AllowedHWCap1 returns the HWCAP1 bits that the guest is allowed to depend +// on. +func (fs FeatureSet) AllowedHWCap1() uint64 { + // Pick a set of safe HWCAPS to expose. These do not rely on cpu state + // that gvisor does not restore after a context switch. + allowed := HWCAP_AES | + HWCAP_ASIMD | + HWCAP_ASIMDDP | + HWCAP_ASIMDFHM | + HWCAP_ASIMDHP | + HWCAP_ASIMDRDM | + HWCAP_ATOMICS | + HWCAP_CRC32 | + HWCAP_DCPOP | + HWCAP_DIT | + HWCAP_EVTSTRM | + HWCAP_FCMA | + HWCAP_FLAGM | + HWCAP_FP | + HWCAP_FPHP | + HWCAP_ILRCPC | + HWCAP_JSCVT | + HWCAP_LRCPC | + HWCAP_PMULL | + HWCAP_SHA1 | + HWCAP_SHA2 | + HWCAP_SHA3 | + HWCAP_SHA512 | + HWCAP_SM3 | + HWCAP_SM4 | + HWCAP_USCAT + return fs.hwCap.hwCap1 & uint64(allowed) +} + +// AllowedHWCap2 returns the HWCAP2 bits that the guest is allowed to depend +// on. +func (fs FeatureSet) AllowedHWCap2() uint64 { + // We don't expose anything here yet, but this could be expanded to + // include features do not rely on cpu state that is not restored after + // a context switch. + allowed := 0 + return fs.hwCap.hwCap2 & uint64(allowed) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64_state_autogen.go index 1d7f9334..fa9ce009 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/cpuid_arm64_state_autogen.go @@ -1,7 +1,7 @@ // automatically generated by stateify. -//go:build arm64 && arm64 && arm64 -// +build arm64,arm64,arm64 +//go:build arm64 && arm64 && arm64 && arm64 +// +build arm64,arm64,arm64,arm64 package cpuid diff --git a/vendor/gvisor.dev/gvisor/pkg/goid/goid_122_amd64.s b/vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_amd64.go similarity index 67% rename from vendor/gvisor.dev/gvisor/pkg/goid/goid_122_amd64.s rename to vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_amd64.go index 5039f73f..f75a0a6f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/goid/goid_122_amd64.s +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_amd64.go @@ -1,4 +1,4 @@ -// Copyright 2020 The gVisor Authors. +// Copyright 2024 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !go1.23 +//go:build amd64 +// +build amd64 -#include "textflag.h" +package cpuid -#define GOID_OFFSET 152 // +checkoffset runtime g.goid - -// func goid() int64 -TEXT ·goid(SB),NOSPLIT|NOFRAME,$0-8 - MOVQ (TLS), R14 - MOVQ GOID_OFFSET(R14), R14 - MOVQ R14, ret+0(FP) - RET +// See arch/x86/include/uapi/asm/hwcap2.h +const ( + HWCAP2_RING3MWAIT = 1 << 0 + HWCAP2_FSGSBASE = 1 << 1 +) diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_arm64.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_arm64.go new file mode 100644 index 00000000..8b85bf9b --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/hwcap_arm64.go @@ -0,0 +1,79 @@ +// Copyright 2024 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build arm64 +// +build arm64 + +package cpuid + +// See arch/arm64/include/uapi/asm/hwcap.h +const ( + // HWCAP flags for AT_HWCAP. + HWCAP_FP = 1 << 0 + HWCAP_ASIMD = 1 << 1 + HWCAP_EVTSTRM = 1 << 2 + HWCAP_AES = 1 << 3 + HWCAP_PMULL = 1 << 4 + HWCAP_SHA1 = 1 << 5 + HWCAP_SHA2 = 1 << 6 + HWCAP_CRC32 = 1 << 7 + HWCAP_ATOMICS = 1 << 8 + HWCAP_FPHP = 1 << 9 + HWCAP_ASIMDHP = 1 << 10 + HWCAP_CPUID = 1 << 11 + HWCAP_ASIMDRDM = 1 << 12 + HWCAP_JSCVT = 1 << 13 + HWCAP_FCMA = 1 << 14 + HWCAP_LRCPC = 1 << 15 + HWCAP_DCPOP = 1 << 16 + HWCAP_SHA3 = 1 << 17 + HWCAP_SM3 = 1 << 18 + HWCAP_SM4 = 1 << 19 + HWCAP_ASIMDDP = 1 << 20 + HWCAP_SHA512 = 1 << 21 + HWCAP_SVE = 1 << 22 + HWCAP_ASIMDFHM = 1 << 23 + HWCAP_DIT = 1 << 24 + HWCAP_USCAT = 1 << 25 + HWCAP_ILRCPC = 1 << 26 + HWCAP_FLAGM = 1 << 27 + HWCAP_SSBS = 1 << 28 + HWCAP_SB = 1 << 29 + HWCAP_PACA = 1 << 30 + HWCAP_PACG = 1 << 31 + + // HWCAP2 flags for AT_HWCAP2. + HWCAP2_DCPODP = 1 << 0 + HWCAP2_SVE2 = 1 << 1 + HWCAP2_SVEAES = 1 << 2 + HWCAP2_SVEPMULL = 1 << 3 + HWCAP2_SVEBITPERM = 1 << 4 + HWCAP2_SVESHA3 = 1 << 5 + HWCAP2_SVESM4 = 1 << 6 + HWCAP2_FLAGM2 = 1 << 7 + HWCAP2_FRINT = 1 << 8 + HWCAP2_SVEI8MM = 1 << 9 + HWCAP2_SVEF32MM = 1 << 10 + HWCAP2_SVEF64MM = 1 << 11 + HWCAP2_SVEBF16 = 1 << 12 + HWCAP2_I8MM = 1 << 13 + HWCAP2_BF16 = 1 << 14 + HWCAP2_DGH = 1 << 15 + HWCAP2_RNG = 1 << 16 + HWCAP2_BTI = 1 << 17 + HWCAP2_MTE = 1 << 18 + HWCAP2_ECV = 1 << 19 + HWCAP2_AFP = 1 << 20 + HWCAP2_RPRES = 1 << 21 +) diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/native_amd64.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/native_amd64.go index ac2fcbbc..cc4ca313 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/native_amd64.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/native_amd64.go @@ -18,9 +18,10 @@ package cpuid import ( - "io/ioutil" + "bufio" + "bytes" + "os" "strconv" - "strings" "gvisor.dev/gvisor/pkg/log" ) @@ -162,6 +163,23 @@ func (fs FeatureSet) query(fn cpuidFunction) (uint32, uint32, uint32, uint32) { return out.Eax, out.Ebx, out.Ecx, out.Edx } +// Intersect returns the intersection of features between self and allowedFeatures. +func (fs FeatureSet) Intersect(allowedFeatures map[Feature]struct{}) (FeatureSet, error) { + hs := fs.ToStatic() + + // only keep features inside allowedFeatures. + for f := range allFeatures { + if fs.HasFeature(f) { + if _, ok := allowedFeatures[f]; !ok { + log.Infof("Removing CPU feature %v as it is not allowed.", f) + hs.Remove(f) + } + } + } + + return hs.ToFeatureSet(), nil +} + var hostFeatureSet FeatureSet // HostFeatureSet returns a host CPUID. @@ -180,39 +198,44 @@ var ( // filter installation. This value is used to create the fake /proc/cpuinfo // from a FeatureSet. func readMaxCPUFreq() { - cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") + cpuinfoFile, err := os.Open("/proc/cpuinfo") if err != nil { // Leave it as 0... the VDSO bails out in the same way. - log.Warningf("Could not read /proc/cpuinfo: %v", err) + log.Warningf("Could not open /proc/cpuinfo: %v", err) return } - cpuinfo := string(cpuinfob) + defer cpuinfoFile.Close() // We get the value straight from host /proc/cpuinfo. On machines with // frequency scaling enabled, this will only get the current value // which will likely be inaccurate. This is fine on machines with // frequency scaling disabled. - for _, line := range strings.Split(cpuinfo, "\n") { - if strings.Contains(line, "cpu MHz") { - splitMHz := strings.Split(line, ":") + s := bufio.NewScanner(cpuinfoFile) + for s.Scan() { + line := s.Bytes() + if bytes.Contains(line, []byte("cpu MHz")) { + splitMHz := bytes.Split(line, []byte(":")) if len(splitMHz) < 2 { - log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line") + log.Warningf("Could not parse /proc/cpuinfo: malformed cpu MHz line: %q", line) return } - // If there was a problem, leave cpuFreqMHz as 0. var err error - cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64) + splitMHzStr := string(bytes.TrimSpace(splitMHz[1])) + f64MHz, err := strconv.ParseFloat(splitMHzStr, 64) if err != nil { - log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err) - cpuFreqMHz = 0 + log.Warningf("Could not parse cpu MHz value %q: %v", splitMHzStr, err) return } + cpuFreqMHz = f64MHz return } } + if err := s.Err(); err != nil { + log.Warningf("Could not read /proc/cpuinfo: %v", err) + return + } log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz") - } // xgetbv reads an extended control register. diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/native_arm64.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/native_arm64.go index f09edcec..eeb6d149 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/native_arm64.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/native_arm64.go @@ -18,7 +18,8 @@ package cpuid import ( - "io/ioutil" + "fmt" + "os" "runtime" "strconv" "strings" @@ -41,6 +42,13 @@ func (fs FeatureSet) Fixed() FeatureSet { return fs } +// Intersect returns the intersection of features between self and allowedFeatures. +// +// Just return error as there is no ARM64 equivalent to cpuid.Static.Remove(). +func (fs FeatureSet) Intersect(allowedFeatures map[Feature]struct{}) (FeatureSet, error) { + return FeatureSet{}, fmt.Errorf("FeatureSet intersection is not supported on ARM64") +} + // Reads CPU information from host /proc/cpuinfo. // // Must run before syscall filter installation. This value is used to create @@ -51,7 +59,7 @@ func initCPUInfo() { // warn about them not existing. return } - cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") + cpuinfob, err := os.ReadFile("/proc/cpuinfo") if err != nil { // Leave everything at 0, nothing can be done. log.Warningf("Could not read /proc/cpuinfo: %v", err) diff --git a/vendor/gvisor.dev/gvisor/pkg/cpuid/static_amd64.go b/vendor/gvisor.dev/gvisor/pkg/cpuid/static_amd64.go index f21f2e4f..6012169f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/cpuid/static_amd64.go +++ b/vendor/gvisor.dev/gvisor/pkg/cpuid/static_amd64.go @@ -26,7 +26,9 @@ type Static map[In]Out // Fixed converts the FeatureSet to a fixed set. func (fs FeatureSet) Fixed() FeatureSet { - return fs.ToStatic().ToFeatureSet() + sfs := fs.ToStatic().ToFeatureSet() + sfs.hwCap = fs.hwCap + return sfs } // ToStatic converts a FeatureSet to a Static function. @@ -103,8 +105,8 @@ func (s Static) normalize() { if fs.HasFeature(X86FeatureXSAVE) { in := In{Eax: uint32(xSaveInfo)} out := s[in] - out.Ecx = maxXsaveSize - out.Ebx = xsaveSize + out.Ecx = max(out.Ecx, maxXsaveSize) + out.Ebx = max(out.Ebx, xsaveSize) s[in] = out } } diff --git a/vendor/gvisor.dev/gvisor/pkg/goid/goid_122_arm64.s b/vendor/gvisor.dev/gvisor/pkg/goid/goid_122_arm64.s deleted file mode 100644 index ec59b4be..00000000 --- a/vendor/gvisor.dev/gvisor/pkg/goid/goid_122_arm64.s +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !go1.23 - -#include "textflag.h" - -#define GOID_OFFSET 152 // +checkoffset runtime g.goid - -// func goid() int64 -TEXT ·goid(SB),NOSPLIT,$0-8 - MOVD g, R0 // g - MOVD GOID_OFFSET(R0), R0 - MOVD R0, ret+0(FP) - RET diff --git a/vendor/gvisor.dev/gvisor/pkg/goid/goid_123_amd64.s b/vendor/gvisor.dev/gvisor/pkg/goid/goid_asm_impl_amd64.s similarity index 91% rename from vendor/gvisor.dev/gvisor/pkg/goid/goid_123_amd64.s rename to vendor/gvisor.dev/gvisor/pkg/goid/goid_asm_impl_amd64.s index 9f53a4e9..10878714 100644 --- a/vendor/gvisor.dev/gvisor/pkg/goid/goid_123_amd64.s +++ b/vendor/gvisor.dev/gvisor/pkg/goid/goid_asm_impl_amd64.s @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.23 +//go:build amd64 #include "textflag.h" -#define GOID_OFFSET 160 // +checkoffset runtime g.goid +#define GOID_OFFSET 152 // func goid() int64 TEXT ·goid(SB),NOSPLIT|NOFRAME,$0-8 diff --git a/vendor/gvisor.dev/gvisor/pkg/goid/goid_123_arm64.s b/vendor/gvisor.dev/gvisor/pkg/goid/goid_asm_impl_arm64.s similarity index 91% rename from vendor/gvisor.dev/gvisor/pkg/goid/goid_123_arm64.s rename to vendor/gvisor.dev/gvisor/pkg/goid/goid_asm_impl_arm64.s index 08d70578..9bd8bed6 100644 --- a/vendor/gvisor.dev/gvisor/pkg/goid/goid_123_arm64.s +++ b/vendor/gvisor.dev/gvisor/pkg/goid/goid_asm_impl_arm64.s @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.23 +//go:build arm64 #include "textflag.h" -#define GOID_OFFSET 160 // +checkoffset runtime g.goid +#define GOID_OFFSET 152 // func goid() int64 TEXT ·goid(SB),NOSPLIT,$0-8 diff --git a/vendor/gvisor.dev/gvisor/pkg/log/bug.go b/vendor/gvisor.dev/gvisor/pkg/log/bug.go new file mode 100644 index 00000000..7e633b4d --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/log/bug.go @@ -0,0 +1,161 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package log + +import ( + "fmt" + "runtime" + "strings" + + "gvisor.dev/gvisor/pkg/sync" +) + +// This file contains helper functions analogous to the Linux kernel's WARN* +// macros. Should be used for non-fatal errors that should be treated as bugs +// none the less. + +const ( + warnFmtStr = "WARNING: BUG on %s:%d\n" + warnUnknownLineStr = "WARNING: BUG on unknown line\n" + catchAllMagic = "runtime.Caller failed" +) + +//go:noinline +func reportBugErr(caller int, err error) { + reportBug(caller+1, err.Error(), nil) +} + +func reportBug(caller int, msg string, vars []any) { + var b strings.Builder + if _, file, line, ok := runtime.Caller(caller); ok { + fmt.Fprintf(&b, warnFmtStr, file, line) + } else { + b.WriteString(warnUnknownLineStr) + } + b.WriteByte('\n') + if len(msg) > 0 { + if len(vars) > 0 { + fmt.Fprintf(&b, msg, vars...) + } else { + b.WriteString(msg) + } + b.WriteByte('\n') + } + TracebackAll(b.String()) +} + +var ( + // warnedMu protects the variables below. + warnedMu sync.Mutex + // warnedSet is used to keep track of which WarnOnOnce calls have fired. + warnedSet map[string]struct{} +) + +//go:noinline +func reportBugErrOnce(caller int, err error) { + reportBugOnce(caller+1, err.Error(), nil) +} + +func reportBugOnce(caller int, msg string, vars []any) { + var b strings.Builder + if _, file, line, ok := runtime.Caller(caller); ok { + key := fmt.Sprintf("%s:%d", file, line) + + warnedMu.Lock() + defer warnedMu.Unlock() + + if _, ok = warnedSet[key]; !ok { + fmt.Fprintf(&b, warnFmtStr, file, line) + b.WriteByte('\n') + if len(msg) > 0 { + if len(vars) > 0 { + fmt.Fprintf(&b, msg, vars...) + } else { + b.WriteString(msg) + } + b.WriteByte('\n') + } + + TracebackAll(b.String()) + warnedSet[key] = struct{}{} + } + } else { + warnedMu.Lock() + defer warnedMu.Unlock() + + // Use const string as a catch-all when runtime.Caller fails, + // so as to avoid log-spam since that's the point of WARN_ONCE. + if _, ok := warnedSet[catchAllMagic]; !ok { + b.WriteString(warnUnknownLineStr) + b.WriteByte('\n') + if len(msg) > 0 { + if len(vars) > 0 { + fmt.Fprintf(&b, msg, vars...) + } else { + b.WriteString(msg) + } + b.WriteByte('\n') + } + + TracebackAll(b.String()) + warnedSet[catchAllMagic] = struct{}{} + } + } +} + +// BugTraceback will report a bug with a traceback of all goroutines if the +// error isn't nil. Use it for reporting abnormal bugs encountered at runtime +// that should be fixed. +// +// Do not use this for bad user input. Errors reported by this function should +// not be fatal. +func BugTraceback(err error) { + if err != nil { + reportBugErr(2, err) + } +} + +// BugTracebackf will report a bug with a traceback of all goroutines. +// Use it for reporting abnormal bugs encountered at runtime that should be +// fixed. +// +// Do not use this for bad user input. Errors reported by this function should +// not be fatal. +func BugTracebackf(s string, a ...any) { + reportBug(2, s, a) +} + +// BugTracebackOnce will report a bug with a traceback of all goroutines if the +// error isn't nil. Use it for reporting abnormal bugs encountered at runtime +// that should be fixed. If called multiple time from same invocation, will only +// print once. +// +// Do not use this for bad user input. Errors reported by this function should +// not be fatal. +func BugTracebackOnce(err error) { + if err != nil { + reportBugErrOnce(2, err) + } +} + +// BugTracebackfOnce will report a bug with a traceback of all goroutines. +// Use it for reporting abnormal bugs encountered at runtime that should be +// fixed. If called multiple time from same invocation, will only print once. +// +// Do not use this for bad user input. Errors reported by this function should +// not be fatal. +func BugTracebackfOnce(s string, a ...any) { + reportBugOnce(2, s, a) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/log/file.go b/vendor/gvisor.dev/gvisor/pkg/log/file.go new file mode 100644 index 00000000..15860b68 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/log/file.go @@ -0,0 +1,60 @@ +// Copyright 2026 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package log + +import ( + "fmt" + "os" + "path/filepath" +) + +// FileOpts contains options for creating a log file. +type FileOpts interface { + // Build constructs the log file path based on the given pattern. + Build(logPattern string) string +} + +// DefaultFileOpts is the default implementation of FileOpts which supports no +// variable substitution. +type DefaultFileOpts struct{} + +// Build implements FileOpts.Build. +func (f *DefaultFileOpts) Build(logPattern string) string { + return logPattern +} + +// OpenFile opens a log file using the specified flags. It uses `opts` to +// construct the log file path based on the given `logPattern`. +func OpenFile(logPattern string, flags int, opts FileOpts) (*os.File, error) { + if len(logPattern) == 0 { + return nil, nil + } + + // Replace variables in the log pattern. + logPath := opts.Build(logPattern) + + // Create parent directory if it doesn't exist. + dir := filepath.Dir(logPath) + if err := os.MkdirAll(dir, 0755); err != nil { + return nil, fmt.Errorf("error creating dir %q: %v", dir, err) + } + + // Open file with the specified flags. + f, err := os.OpenFile(logPath, flags, 0644) + if err != nil { + return nil, fmt.Errorf("error opening file %q: %v", logPath, err) + } + return f, nil +} diff --git a/vendor/gvisor.dev/gvisor/pkg/log/log.go b/vendor/gvisor.dev/gvisor/pkg/log/log.go index 581aa77c..4f1cb741 100644 --- a/vendor/gvisor.dev/gvisor/pkg/log/log.go +++ b/vendor/gvisor.dev/gvisor/pkg/log/log.go @@ -327,7 +327,7 @@ func Stacks(all bool) []byte { } // stackRegexp matches one level within a stack trace. -var stackRegexp = regexp.MustCompile("(?m)^\\S+\\(.*\\)$\\r?\\n^\\t\\S+:\\d+.*$\\r?\\n") +var stackRegexp = regexp.MustCompile(`(?m)^\S+\(.*\)$\r?\n^\t\S+:\d+.*$\r?\n`) // LocalStack returns the local goroutine stack, excluding the top N entries. // LocalStack's own entry is excluded by default and does not need to be counted in excludeTopN. @@ -396,4 +396,6 @@ func CopyStandardLogTo(l Level) error { func init() { // Store the initial value for the log. log.Store(&BasicLogger{Level: Info, Emitter: GoogleEmitter{&Writer{Next: os.Stderr}}}) + + warnedSet = make(map[string]struct{}) } diff --git a/vendor/gvisor.dev/gvisor/pkg/refs/refs_map.go b/vendor/gvisor.dev/gvisor/pkg/refs/refs_map.go index f94fea87..9c61ba87 100644 --- a/vendor/gvisor.dev/gvisor/pkg/refs/refs_map.go +++ b/vendor/gvisor.dev/gvisor/pkg/refs/refs_map.go @@ -162,7 +162,7 @@ func doLeakCheck() { skip = o.LeakCheckDisabled() } if skip { - log.Debugf(obj.LeakMessage()) + log.Debugf("%s", obj.LeakMessage()) continue } msg += obj.LeakMessage() + "\n" @@ -174,6 +174,6 @@ func doLeakCheck() { if leakCheckPanicEnabled() { panic(msg) } - log.Warningf(msg) + log.Warningf("%s", msg) } } diff --git a/vendor/gvisor.dev/gvisor/pkg/sleep/sleep_unsafe_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/sleep/sleep_unsafe_state_autogen.go index c6d7cf50..37c6d5d9 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sleep/sleep_unsafe_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/sleep/sleep_unsafe_state_autogen.go @@ -25,8 +25,8 @@ func (s *Sleeper) beforeSave() {} // +checklocksignore func (s *Sleeper) StateSave(stateSinkObject state.Sink) { s.beforeSave() - var sharedListValue *Waker - sharedListValue = s.saveSharedList() + sharedListValue := s.saveSharedList() + _ = (*Waker)(sharedListValue) stateSinkObject.SaveValue(0, sharedListValue) stateSinkObject.Save(1, &s.localList) stateSinkObject.Save(2, &s.allWakers) @@ -58,8 +58,8 @@ func (w *Waker) beforeSave() {} // +checklocksignore func (w *Waker) StateSave(stateSinkObject state.Sink) { w.beforeSave() - var sValue wakerState - sValue = w.saveS() + sValue := w.saveS() + _ = (wakerState)(sValue) stateSinkObject.SaveValue(0, sValue) stateSinkObject.Save(1, &w.next) stateSinkObject.Save(2, &w.allWakersNext) diff --git a/vendor/gvisor.dev/gvisor/pkg/state/addr_set.go b/vendor/gvisor.dev/gvisor/pkg/state/addr_set.go index 49b8bd5e..ef5723d3 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/addr_set.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/addr_set.go @@ -384,7 +384,7 @@ func (s *addrSet) InsertWithoutMergingUnchecked(gap addrGapIterator, r addrRange if splitMaxGap { gap.node.updateMaxGapLeaf() } - return addrIterator{gap.node, gap.index} + return addrIterator(gap) } // InsertRange inserts the given segment into the set. If the new segment can @@ -513,7 +513,7 @@ func (s *addrSet) Remove(seg addrIterator) addrGapIterator { if addrtrackGaps != 0 { seg.node.updateMaxGapLeaf() } - return seg.node.rebalanceAfterRemove(addrGapIterator{seg.node, seg.index}) + return seg.node.rebalanceAfterRemove(addrGapIterator(seg)) } // RemoveAll removes all segments from the set. All existing iterators are @@ -530,21 +530,52 @@ func (s *addrSet) RemoveAll() { // if the caller needs to do additional work before removing each segment, // iterate segments and call Remove in a loop instead. func (s *addrSet) RemoveRange(r addrRange) addrGapIterator { - seg, gap := s.Find(r.Start) - if seg.Ok() { - seg = s.Isolate(seg, r) - gap = s.Remove(seg) - } - for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { - seg = s.SplitAfter(seg, r.End) - gap = s.Remove(seg) - } - return gap + return s.RemoveRangeWith(r, nil) } // RemoveFullRange is equivalent to RemoveRange, except that if any key in the // given range does not correspond to a segment, RemoveFullRange panics. func (s *addrSet) RemoveFullRange(r addrRange) addrGapIterator { + return s.RemoveFullRangeWith(r, nil) +} + +// RemoveRangeWith removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +// +// The function f is applied to each segment immediately before it is removed, +// in order of ascending keys. Segments that lie partially outside r are split +// before f is called, such that f only observes segments entirely within r. +// Non-empty gaps between segments are skipped. +// +// RemoveRangeWith searches the set to find segments to remove. If the caller +// already has an iterator to either end of the range of segments to remove, or +// if the caller needs to do additional work before removing each segment, +// iterate segments and call Remove in a loop instead. +// +// N.B. f must not invalidate iterators into s. +func (s *addrSet) RemoveRangeWith(r addrRange, f func(seg addrIterator)) addrGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + if f != nil { + f(seg) + } + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.SplitAfter(seg, r.End) + if f != nil { + f(seg) + } + gap = s.Remove(seg) + } + return gap +} + +// RemoveFullRangeWith is equivalent to RemoveRangeWith, except that if any key +// in the given range does not correspond to a segment, RemoveFullRangeWith +// panics. +func (s *addrSet) RemoveFullRangeWith(r addrRange, f func(seg addrIterator)) addrGapIterator { seg := s.FindSegment(r.Start) if !seg.Ok() { panic(fmt.Sprintf("missing segment at %v", r.Start)) @@ -552,6 +583,9 @@ func (s *addrSet) RemoveFullRange(r addrRange) addrGapIterator { seg = s.SplitBefore(seg, r.Start) for { seg = s.SplitAfter(seg, r.End) + if f != nil { + f(seg) + } end := seg.End() gap := s.Remove(seg) if r.End <= end { @@ -564,6 +598,19 @@ func (s *addrSet) RemoveFullRange(r addrRange) addrGapIterator { } } +// MoveFrom moves all segments from s2 to s, replacing all existing segments in +// s and leaving s2 empty. +func (s *addrSet) MoveFrom(s2 *addrSet) { + *s = *s2 + for _, child := range s.root.children { + if child == nil { + break + } + child.parent = &s.root + } + s2.RemoveAll() +} + // Merge attempts to merge two neighboring segments. If successful, Merge // returns an iterator to the merged segment, and all existing iterators are // invalidated. Otherwise, Merge returns a terminal iterator. @@ -817,11 +864,11 @@ func (s *addrSet) Isolate(seg addrIterator, r addrRange) addrIterator { // LowerBoundSegmentSplitBefore provides an iterator to the first segment to be // mutated, suitable as the initial value for a loop variable. func (s *addrSet) LowerBoundSegmentSplitBefore(min uintptr) addrIterator { - seg := s.LowerBoundSegment(min) + seg, gap := s.Find(min) if seg.Ok() { - seg = s.SplitBefore(seg, min) + return s.SplitBefore(seg, min) } - return seg + return gap.NextSegment() } // UpperBoundSegmentSplitAfter combines UpperBoundSegment and SplitAfter. @@ -831,11 +878,11 @@ func (s *addrSet) LowerBoundSegmentSplitBefore(min uintptr) addrIterator { // UpperBoundSegmentSplitAfter provides an iterator to the first segment to be // mutated, suitable as the initial value for a loop variable. func (s *addrSet) UpperBoundSegmentSplitAfter(max uintptr) addrIterator { - seg := s.UpperBoundSegment(max) + seg, gap := s.Find(max) if seg.Ok() { - seg = s.SplitAfter(seg, max) + return s.SplitAfter(seg, max) } - return seg + return gap.PrevSegment() } // VisitRange applies the function f to all segments intersecting the range r, @@ -1575,7 +1622,7 @@ func (seg addrIterator) PrevGap() addrGapIterator { return seg.node.children[seg.index].lastSegment().NextGap() } - return addrGapIterator{seg.node, seg.index} + return addrGapIterator(seg) } // NextGap returns the gap immediately after the iterated segment. @@ -1867,26 +1914,26 @@ func (n *addrnode) String() string { func (n *addrnode) writeDebugString(buf *bytes.Buffer, prefix string) { if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { buf.WriteString(prefix) - buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + fmt.Fprintf(buf, "WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren) } for i := 0; i < n.nrSegments; i++ { if child := n.children[i]; child != nil { cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) if child.parent != n || child.parentIndex != i { buf.WriteString(cprefix) - buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + fmt.Fprintf(buf, "WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i) } child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) } buf.WriteString(prefix) if n.hasChildren { if addrtrackGaps != 0 { - buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + fmt.Fprintf(buf, "- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get()) } else { - buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + fmt.Fprintf(buf, "- % 3d: %v => %v\n", i, n.keys[i], n.values[i]) } } else { - buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + fmt.Fprintf(buf, "- % 3d: %v => %v\n", i, n.keys[i], n.values[i]) } } if child := n.children[n.nrSegments]; child != nil { diff --git a/vendor/gvisor.dev/gvisor/pkg/state/decode.go b/vendor/gvisor.dev/gvisor/pkg/state/decode.go index fd378768..5306d20d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/decode.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/decode.go @@ -32,7 +32,7 @@ type internalCallback interface { source() *objectDecodeState // callbackRun executes the callback. - callbackRun() + callbackRun(ds *decodeState) } // userCallback is an implementation of internalCallback. @@ -44,7 +44,7 @@ func (userCallback) source() *objectDecodeState { } // callbackRun implements internalCallback.callbackRun. -func (uc userCallback) callbackRun() { +func (uc userCallback) callbackRun(*decodeState) { uc() } @@ -84,7 +84,13 @@ type objectDecodeState struct { // callbacks is a set of callbacks to execute on load. callbacks []internalCallback - completeEntry + pendingEntry odsListElem + leafEntry odsListElem +} + +type odsListElem struct { + ods *objectDecodeState + odsEntry } // addCallback adds a callback to the objectDecodeState. @@ -122,8 +128,13 @@ func (ods *objectDecodeState) source() *objectDecodeState { } // callbackRun implements internalCallback.callbackRun. -func (ods *objectDecodeState) callbackRun() { +func (ods *objectDecodeState) callbackRun(ds *decodeState) { ods.blockedBy-- + if ods.blockedBy == 0 { + ds.leaves.PushBack(&ods.leafEntry) + } else if ods.blockedBy < 0 { + Failf("object %d has negative blockedBy: %d", ods.id, ods.blockedBy) + } } // decodeState is a graph of objects in the process of being decoded. @@ -155,7 +166,11 @@ type decodeState struct { deferred map[objectID]wire.Object // pending is the set of objects that are not yet complete. - pending completeList + pending odsList + + // leaves is the set of objects that have no dependencies (blockedBy == 0). + // leaves are consumed from the front and appended to the back. + leaves odsList // stats tracks time data. stats Stats @@ -185,20 +200,12 @@ func (ds *decodeState) checkComplete(ods *objectDecodeState) bool { // Fire all callbacks. for _, ic := range ods.callbacks { - ic.callbackRun() + ic.callbackRun(ds) } // Mark completed. - cbs := ods.callbacks ods.callbacks = nil - ds.pending.Remove(ods) - - // Recursively check others. - for _, ic := range cbs { - if other := ic.source(); other != nil && other.blockedBy == 0 { - ds.checkComplete(other) - } - } + ds.pending.Remove(&ods.pendingEntry) return true // All set. } @@ -223,6 +230,9 @@ func (ds *decodeState) wait(waiter *objectDecodeState, id objectID, callback fun // Mark as blocked. waiter.blockedBy++ + if waiter.blockedBy == 1 { + ds.leaves.Remove(&waiter.leafEntry) + } // No nil can be returned here. other := ds.lookup(id) @@ -280,6 +290,26 @@ func walkChild(path []wire.Dot, obj reflect.Value) reflect.Value { return obj } +func (ds *decodeState) growObjectsByID(id objectID) { + if len(ds.objectsByID) < int(id) { + ds.objectsByID = append(ds.objectsByID, make([]*objectDecodeState, int(id)-len(ds.objectsByID))...) + } +} + +func (ds *decodeState) addObject(id objectID, obj reflect.Value) *objectDecodeState { + ods := &objectDecodeState{ + id: id, + obj: obj, + } + ods.pendingEntry.ods = ods + ods.leafEntry.ods = ods + ds.growObjectsByID(id) + ds.objectsByID[id-1] = ods + ds.pending.PushBack(&ods.pendingEntry) + ds.leaves.PushBack(&ods.leafEntry) + return ods +} + // register registers a decode with a type. // // This type is only used to instantiate a new object if it has not been @@ -288,11 +318,9 @@ func walkChild(path []wire.Dot, obj reflect.Value) reflect.Value { func (ds *decodeState) register(r *wire.Ref, typ reflect.Type) reflect.Value { // Grow the objectsByID slice. id := objectID(r.Root) - if len(ds.objectsByID) < int(id) { - ds.objectsByID = append(ds.objectsByID, make([]*objectDecodeState, int(id)-len(ds.objectsByID))...) - } // Does this object already exist? + ds.growObjectsByID(id) ods := ds.objectsByID[id-1] if ods != nil { return walkChild(r.Dots, ods.obj) @@ -303,12 +331,7 @@ func (ds *decodeState) register(r *wire.Ref, typ reflect.Type) reflect.Value { typ = ds.findType(r.Type) } v := reflect.New(typ) - ods = &objectDecodeState{ - id: id, - obj: v.Elem(), - } - ds.objectsByID[id-1] = ods - ds.pending.PushBack(ods) + ods = ds.addObject(id, v.Elem()) // Process any deferred objects & callbacks. if encoded, ok := ds.deferred[id]; ok { @@ -581,13 +604,8 @@ func (ds *decodeState) Load(obj reflect.Value) { return ds.types.LookupName(id) }) - // Create the root object. - rootOds := &objectDecodeState{ - id: 1, - obj: obj, - } - ds.objectsByID = append(ds.objectsByID, rootOds) - ds.pending.PushBack(rootOds) + // Add the root object with ID 1. + _ = ds.addObject(1, obj) // Read the number of objects. numObjects, object, err := ReadHeader(&ds.r) @@ -603,7 +621,6 @@ func (ds *decodeState) Load(obj reflect.Value) { encoded wire.Object ods *objectDecodeState id objectID - tid = typeID(1) ) if err := safely(func() { // Decode all objects in the stream. @@ -616,7 +633,6 @@ func (ds *decodeState) Load(obj reflect.Value) { switch we := encoded.(type) { case *wire.Type: ds.types.Register(we) - tid++ encoded = nil continue case wire.Uint: @@ -673,22 +689,13 @@ func (ds *decodeState) Load(obj reflect.Value) { // objects become complete (there is a dependency cycle). // // Note that we iterate backwards here, because there will be a strong - // tendendcy for blocking relationships to go from earlier objects to + // tendency for blocking relationships to go from earlier objects to // later (deeper) objects in the graph. This will reduce the number of // iterations required to finish all objects. if err := safely(func() { - for ds.pending.Back() != nil { - thisCycle := false - for ods = ds.pending.Back(); ods != nil; { - if ds.checkComplete(ods) { - thisCycle = true - break - } - ods = ods.Prev() - } - if !thisCycle { - break - } + for elem := ds.leaves.Front(); elem != nil; elem = elem.Next() { + ods = elem.ods + ds.checkComplete(elem.ods) } }); err != nil { Failf("error executing callbacks: %w\nfor object %#v", err, ods.obj.Interface()) @@ -696,9 +703,9 @@ func (ds *decodeState) Load(obj reflect.Value) { // Check if we have any remaining dependency cycles. If there are any // objects left in the pending list, then it must be due to a cycle. - if ods := ds.pending.Front(); ods != nil { + if elem := ds.pending.Front(); elem != nil { // This must be the result of a dependency cycle. - cycle := ods.findCycle() + cycle := elem.ods.findCycle() var buf bytes.Buffer buf.WriteString("dependency cycle: {") for i, cycleOS := range cycle { @@ -708,7 +715,7 @@ func (ds *decodeState) Load(obj reflect.Value) { fmt.Fprintf(&buf, "%q", cycleOS.obj.Type()) } buf.WriteString("}") - Failf("incomplete graph: %s", string(buf.Bytes())) + Failf("incomplete graph: %s", buf.String()) } } diff --git a/vendor/gvisor.dev/gvisor/pkg/state/encode.go b/vendor/gvisor.dev/gvisor/pkg/state/encode.go index 861be309..c5b4a079 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/encode.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/encode.go @@ -802,14 +802,19 @@ func (es *encodeState) Save(obj reflect.Value) { }) for _, id := range ids { // Encode the id. + oes = nil wire.Save(&es.w, wire.Uint(id)) // Marshal the object. - oes := es.pending[id] + oes = es.pending[id] wire.Save(&es.w, oes.encoded) } }); err != nil { - // Include the object and the error. - Failf("error serializing object %#v: %w", oes.encoded, err) + if oes != nil { + // Include the object and the error. + Failf("error serializing object %#v: %w", oes.encoded, err) + } else { + Failf("error serializing type or ID: %w", err) + } } } diff --git a/vendor/gvisor.dev/gvisor/pkg/state/complete_list.go b/vendor/gvisor.dev/gvisor/pkg/state/ods_list.go similarity index 60% rename from vendor/gvisor.dev/gvisor/pkg/state/complete_list.go rename to vendor/gvisor.dev/gvisor/pkg/state/ods_list.go index dbb738d9..d71f1f55 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/complete_list.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/ods_list.go @@ -6,14 +6,14 @@ package state // objects, if they are not the same. An ElementMapper is not typically // required if: Linker is left as is, Element is left as is, or Linker and // Element are the same type. -type completeElementMapper struct{} +type odsElementMapper struct{} // linkerFor maps an Element to a Linker. // // This default implementation should be inlined. // //go:nosplit -func (completeElementMapper) linkerFor(elem *objectDecodeState) *objectDecodeState { return elem } +func (odsElementMapper) linkerFor(elem *odsListElem) *odsListElem { return elem } // List is an intrusive list. Entries can be added to or removed from the list // in O(1) time and with no additional memory allocations. @@ -27,13 +27,13 @@ func (completeElementMapper) linkerFor(elem *objectDecodeState) *objectDecodeSta // } // // +stateify savable -type completeList struct { - head *objectDecodeState - tail *objectDecodeState +type odsList struct { + head *odsListElem + tail *odsListElem } // Reset resets list l to the empty state. -func (l *completeList) Reset() { +func (l *odsList) Reset() { l.head = nil l.tail = nil } @@ -41,21 +41,21 @@ func (l *completeList) Reset() { // Empty returns true iff the list is empty. // //go:nosplit -func (l *completeList) Empty() bool { +func (l *odsList) Empty() bool { return l.head == nil } // Front returns the first element of list l or nil. // //go:nosplit -func (l *completeList) Front() *objectDecodeState { +func (l *odsList) Front() *odsListElem { return l.head } // Back returns the last element of list l or nil. // //go:nosplit -func (l *completeList) Back() *objectDecodeState { +func (l *odsList) Back() *odsListElem { return l.tail } @@ -64,8 +64,8 @@ func (l *completeList) Back() *objectDecodeState { // NOTE: This is an O(n) operation. // //go:nosplit -func (l *completeList) Len() (count int) { - for e := l.Front(); e != nil; e = (completeElementMapper{}.linkerFor(e)).Next() { +func (l *odsList) Len() (count int) { + for e := l.Front(); e != nil; e = (odsElementMapper{}.linkerFor(e)).Next() { count++ } return count @@ -74,12 +74,12 @@ func (l *completeList) Len() (count int) { // PushFront inserts the element e at the front of list l. // //go:nosplit -func (l *completeList) PushFront(e *objectDecodeState) { - linker := completeElementMapper{}.linkerFor(e) +func (l *odsList) PushFront(e *odsListElem) { + linker := odsElementMapper{}.linkerFor(e) linker.SetNext(l.head) linker.SetPrev(nil) if l.head != nil { - completeElementMapper{}.linkerFor(l.head).SetPrev(e) + odsElementMapper{}.linkerFor(l.head).SetPrev(e) } else { l.tail = e } @@ -90,13 +90,13 @@ func (l *completeList) PushFront(e *objectDecodeState) { // PushFrontList inserts list m at the start of list l, emptying m. // //go:nosplit -func (l *completeList) PushFrontList(m *completeList) { +func (l *odsList) PushFrontList(m *odsList) { if l.head == nil { l.head = m.head l.tail = m.tail } else if m.head != nil { - completeElementMapper{}.linkerFor(l.head).SetPrev(m.tail) - completeElementMapper{}.linkerFor(m.tail).SetNext(l.head) + odsElementMapper{}.linkerFor(l.head).SetPrev(m.tail) + odsElementMapper{}.linkerFor(m.tail).SetNext(l.head) l.head = m.head } @@ -107,12 +107,12 @@ func (l *completeList) PushFrontList(m *completeList) { // PushBack inserts the element e at the back of list l. // //go:nosplit -func (l *completeList) PushBack(e *objectDecodeState) { - linker := completeElementMapper{}.linkerFor(e) +func (l *odsList) PushBack(e *odsListElem) { + linker := odsElementMapper{}.linkerFor(e) linker.SetNext(nil) linker.SetPrev(l.tail) if l.tail != nil { - completeElementMapper{}.linkerFor(l.tail).SetNext(e) + odsElementMapper{}.linkerFor(l.tail).SetNext(e) } else { l.head = e } @@ -123,13 +123,13 @@ func (l *completeList) PushBack(e *objectDecodeState) { // PushBackList inserts list m at the end of list l, emptying m. // //go:nosplit -func (l *completeList) PushBackList(m *completeList) { +func (l *odsList) PushBackList(m *odsList) { if l.head == nil { l.head = m.head l.tail = m.tail } else if m.head != nil { - completeElementMapper{}.linkerFor(l.tail).SetNext(m.head) - completeElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + odsElementMapper{}.linkerFor(l.tail).SetNext(m.head) + odsElementMapper{}.linkerFor(m.head).SetPrev(l.tail) l.tail = m.tail } @@ -140,9 +140,9 @@ func (l *completeList) PushBackList(m *completeList) { // InsertAfter inserts e after b. // //go:nosplit -func (l *completeList) InsertAfter(b, e *objectDecodeState) { - bLinker := completeElementMapper{}.linkerFor(b) - eLinker := completeElementMapper{}.linkerFor(e) +func (l *odsList) InsertAfter(b, e *odsListElem) { + bLinker := odsElementMapper{}.linkerFor(b) + eLinker := odsElementMapper{}.linkerFor(e) a := bLinker.Next() @@ -151,7 +151,7 @@ func (l *completeList) InsertAfter(b, e *objectDecodeState) { bLinker.SetNext(e) if a != nil { - completeElementMapper{}.linkerFor(a).SetPrev(e) + odsElementMapper{}.linkerFor(a).SetPrev(e) } else { l.tail = e } @@ -160,9 +160,9 @@ func (l *completeList) InsertAfter(b, e *objectDecodeState) { // InsertBefore inserts e before a. // //go:nosplit -func (l *completeList) InsertBefore(a, e *objectDecodeState) { - aLinker := completeElementMapper{}.linkerFor(a) - eLinker := completeElementMapper{}.linkerFor(e) +func (l *odsList) InsertBefore(a, e *odsListElem) { + aLinker := odsElementMapper{}.linkerFor(a) + eLinker := odsElementMapper{}.linkerFor(e) b := aLinker.Prev() eLinker.SetNext(a) @@ -170,7 +170,7 @@ func (l *completeList) InsertBefore(a, e *objectDecodeState) { aLinker.SetPrev(e) if b != nil { - completeElementMapper{}.linkerFor(b).SetNext(e) + odsElementMapper{}.linkerFor(b).SetNext(e) } else { l.head = e } @@ -179,19 +179,19 @@ func (l *completeList) InsertBefore(a, e *objectDecodeState) { // Remove removes e from l. // //go:nosplit -func (l *completeList) Remove(e *objectDecodeState) { - linker := completeElementMapper{}.linkerFor(e) +func (l *odsList) Remove(e *odsListElem) { + linker := odsElementMapper{}.linkerFor(e) prev := linker.Prev() next := linker.Next() if prev != nil { - completeElementMapper{}.linkerFor(prev).SetNext(next) + odsElementMapper{}.linkerFor(prev).SetNext(next) } else if l.head == e { l.head = next } if next != nil { - completeElementMapper{}.linkerFor(next).SetPrev(prev) + odsElementMapper{}.linkerFor(next).SetPrev(prev) } else if l.tail == e { l.tail = prev } @@ -205,35 +205,35 @@ func (l *completeList) Remove(e *objectDecodeState) { // methods needed by List. // // +stateify savable -type completeEntry struct { - next *objectDecodeState - prev *objectDecodeState +type odsEntry struct { + next *odsListElem + prev *odsListElem } // Next returns the entry that follows e in the list. // //go:nosplit -func (e *completeEntry) Next() *objectDecodeState { +func (e *odsEntry) Next() *odsListElem { return e.next } // Prev returns the entry that precedes e in the list. // //go:nosplit -func (e *completeEntry) Prev() *objectDecodeState { +func (e *odsEntry) Prev() *odsListElem { return e.prev } // SetNext assigns 'entry' as the entry that follows e in the list. // //go:nosplit -func (e *completeEntry) SetNext(elem *objectDecodeState) { +func (e *odsEntry) SetNext(elem *odsListElem) { e.next = elem } // SetPrev assigns 'entry' as the entry that precedes e in the list. // //go:nosplit -func (e *completeEntry) SetPrev(elem *objectDecodeState) { +func (e *odsEntry) SetPrev(elem *odsListElem) { e.prev = elem } diff --git a/vendor/gvisor.dev/gvisor/pkg/state/state.go b/vendor/gvisor.dev/gvisor/pkg/state/state.go index 6251ce27..89ad292e 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/state.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/state.go @@ -188,7 +188,7 @@ func (s Sink) Context() context.Context { // Type is an interface that must be implemented by Struct objects. This allows // these objects to be serialized while minimizing runtime reflection required. // -// All these methods can be automatically generated by the go_statify tool. +// All these methods can be automatically generated by the go_stateify tool. type Type interface { // StateTypeName returns the type's name. // diff --git a/vendor/gvisor.dev/gvisor/pkg/state/stats.go b/vendor/gvisor.dev/gvisor/pkg/state/stats.go index eaec664a..17c30b6e 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/stats.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/stats.go @@ -124,7 +124,7 @@ func (s *Stats) String() string { total time.Duration ) buf.WriteString("\n") - buf.WriteString(fmt.Sprintf("% 16s | % 8s | % 16s | %s\n", "total", "count", "per", "type")) + fmt.Fprintf(&buf, "% 16s | % 8s | % 16s | %s\n", "total", "count", "per", "type") buf.WriteString("-----------------+----------+------------------+----------------\n") for _, se := range ss { if se.entry.count == 0 { @@ -135,11 +135,11 @@ func (s *Stats) String() string { count += se.entry.count total += se.entry.total per := se.entry.total / time.Duration(se.entry.count) - buf.WriteString(fmt.Sprintf("% 16s | %8d | % 16s | %s\n", - se.entry.total, se.entry.count, per, se.name)) + fmt.Fprintf(&buf, "% 16s | %8d | % 16s | %s\n", + se.entry.total, se.entry.count, per, se.name) } buf.WriteString("-----------------+----------+------------------+----------------\n") - buf.WriteString(fmt.Sprintf("% 16s | % 8d | % 16s | [all]", - total, count, total/time.Duration(count))) - return string(buf.Bytes()) + fmt.Fprintf(&buf, "% 16s | % 8d | % 16s | [all]", + total, count, total/time.Duration(count)) + return buf.String() } diff --git a/vendor/gvisor.dev/gvisor/pkg/state/types.go b/vendor/gvisor.dev/gvisor/pkg/state/types.go index 09f88908..6eee01cc 100644 --- a/vendor/gvisor.dev/gvisor/pkg/state/types.go +++ b/vendor/gvisor.dev/gvisor/pkg/state/types.go @@ -295,23 +295,23 @@ const interfaceType = "interface" var primitiveTypeDatabase = func() map[string]reflect.Type { r := make(map[string]reflect.Type) for _, t := range []reflect.Type{ - reflect.TypeOf(false), - reflect.TypeOf(int(0)), - reflect.TypeOf(int8(0)), - reflect.TypeOf(int16(0)), - reflect.TypeOf(int32(0)), - reflect.TypeOf(int64(0)), - reflect.TypeOf(uint(0)), - reflect.TypeOf(uintptr(0)), - reflect.TypeOf(uint8(0)), - reflect.TypeOf(uint16(0)), - reflect.TypeOf(uint32(0)), - reflect.TypeOf(uint64(0)), - reflect.TypeOf(""), - reflect.TypeOf(float32(0.0)), - reflect.TypeOf(float64(0.0)), - reflect.TypeOf(complex64(0.0)), - reflect.TypeOf(complex128(0.0)), + reflect.TypeFor[bool](), + reflect.TypeFor[int](), + reflect.TypeFor[int8](), + reflect.TypeFor[int16](), + reflect.TypeFor[int32](), + reflect.TypeFor[int64](), + reflect.TypeFor[uint](), + reflect.TypeFor[uintptr](), + reflect.TypeFor[uint8](), + reflect.TypeFor[uint16](), + reflect.TypeFor[uint32](), + reflect.TypeFor[uint64](), + reflect.TypeFor[string](), + reflect.TypeFor[float32](), + reflect.TypeFor[float64](), + reflect.TypeFor[complex64](), + reflect.TypeFor[complex128](), } { r[t.Name()] = t } diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/aliases.go b/vendor/gvisor.dev/gvisor/pkg/sync/aliases.go index ccbac0a6..3c425d12 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/aliases.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/aliases.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. package sync diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_off_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_off_unsafe.go index 87c56dd1..d305569f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_off_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_off_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build !checklocks // +build !checklocks diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_on_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_on_unsafe.go index 16a5d3fb..1e5e4c53 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_on_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/checklocks_on_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build checklocks // +build checklocks diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/gate_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/gate_unsafe.go index 0f3b58dc..d88d0625 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/gate_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/gate_unsafe.go @@ -110,8 +110,8 @@ func (g *Gate) leaveClosed() { if atomic.LoadUintptr(&g.closingG) == 0 { return } - if g := atomic.SwapUintptr(&g.closingG, 0); g > preparingG { - goready(g, 0) + if cG := atomic.SwapUintptr(&g.closingG, 0); cG > preparingG { + goready(cG, 0) } } @@ -133,8 +133,8 @@ func (g *Gate) Close() { panic("concurrent Close of sync.Gate") } - if g := atomic.SwapUintptr(&g.closingG, preparingG); g != 0 { - panic(fmt.Sprintf("invalid sync.Gate.closingG during Close: %#x", g)) + if cG := atomic.SwapUintptr(&g.closingG, preparingG); cG != 0 { + panic(fmt.Sprintf("invalid sync.Gate.closingG during Close: %#x", cG)) } if atomic.LoadInt32(&g.userCount) == math.MinInt32 { // The last call to Leave arrived while we were setting up closingG. @@ -142,6 +142,7 @@ func (g *Gate) Close() { } // WaitReasonSemacquire/TraceBlockSync are consistent with WaitGroup. gopark(gateCommit, gohacks.Noescape(unsafe.Pointer(&g.closingG)), WaitReasonSemacquire, TraceBlockSync, 0) + RaceAcquire(unsafe.Pointer(&g.closingG)) } //go:norace diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/goyield_go113_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/goyield_go113_unsafe.go index c4b03e9a..2a1da77a 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/goyield_go113_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/goyield_go113_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build go1.13 && !go1.14 // +build go1.13,!go1.14 diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/goyield_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/goyield_unsafe.go index 757edbab..db9eae60 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/goyield_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/goyield_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build go1.14 // +build go1.14 diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/mutex_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/mutex_unsafe.go index 9bf41270..49f0af48 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/mutex_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/mutex_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2019 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. package sync diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/norace_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/norace_unsafe.go index 8eca9913..e9403aed 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/norace_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/norace_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2019 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build !race // +build !race diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/race_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/race_unsafe.go index 381163ca..eb66ce11 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/race_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/race_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2019 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build race // +build race diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_amd64.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_amd64.go index dad10bfe..cdca5cae 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_amd64.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_amd64.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build amd64 diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_constants.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_constants_go_impl.go similarity index 75% rename from vendor/gvisor.dev/gvisor/pkg/sync/runtime_constants.go rename to vendor/gvisor.dev/gvisor/pkg/sync/runtime_constants_go_impl.go index d6eef328..907c4b05 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_constants.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_constants_go_impl.go @@ -16,7 +16,7 @@ package sync // Values for the reason argument to gopark, from Go's src/runtime/runtime2.go. const ( - WaitReasonSelect uint8 = 9 // +checkconst runtime waitReasonSelect - WaitReasonChanReceive uint8 = 14 // +checkconst runtime waitReasonChanReceive - WaitReasonSemacquire uint8 = 18 // +checkconst runtime waitReasonSemacquire + WaitReasonSelect uint8 = 9 + WaitReasonChanReceive uint8 = 14 + WaitReasonSemacquire uint8 = 19 ) diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_exectracer2.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_exectracer2_go_impl.go similarity index 81% rename from vendor/gvisor.dev/gvisor/pkg/sync/runtime_exectracer2.go rename to vendor/gvisor.dev/gvisor/pkg/sync/runtime_exectracer2_go_impl.go index 58630af2..c43868ad 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_exectracer2.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_exectracer2_go_impl.go @@ -16,6 +16,6 @@ package sync // TraceBlockReason constants, from Go's src/runtime/trace2runtime.go. const ( - TraceBlockSelect TraceBlockReason = 3 // +checkconst runtime traceBlockSelect - TraceBlockSync TraceBlockReason = 5 // +checkconst runtime traceBlockSync + TraceBlockSelect TraceBlockReason = 3 + TraceBlockSync TraceBlockReason = 5 ) diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go121_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go121_unsafe.go index 344b5566..c9f2c761 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go121_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go121_unsafe.go @@ -1,9 +1,10 @@ // Copyright 2023 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. -//go:build go1.21 +//go:build go1.21 && !go1.24 package sync diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go124_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go124_unsafe.go new file mode 100644 index 00000000..96eef8c1 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go124_unsafe.go @@ -0,0 +1,16 @@ +// Copyright 2024 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. + +// https://go.dev/cl/691596 (1.26) renames the internal map type which nogo relies on. +//go:build go1.24 && !go1.26 + +package sync + +import "unsafe" + +// Use checkoffset to assert that maptype.hasher (the only field we use) has +// the correct offset. +const maptypeHasherOffset = unsafe.Offsetof(maptype{}.Hasher) // +checkoffset internal/abi SwissMapType.Hasher diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go126_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go126_unsafe.go new file mode 100644 index 00000000..4bfcb47d --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_go126_unsafe.go @@ -0,0 +1,16 @@ +// Copyright 2024 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. + +// https://go.dev/cl/691596 (1.26) renames the internal map type which nogo relies on. +//go:build go1.26 + +package sync + +import "unsafe" + +// Use checkoffset to assert that maptype.hasher (the only field we use) has +// the correct offset. +const maptypeHasherOffset = unsafe.Offsetof(maptype{}.Hasher) // +checkoffset internal/abi MapType.Hasher diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_not_go121_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_not_go121_unsafe.go index 4d7e8b9f..74f6d321 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_not_go121_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_not_go121_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2023 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. // runtime.maptype is moved to internal/abi.MapType in Go 1.21. // diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_other.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_other.go index cbd06216..705b1f21 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_other.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_other.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. //go:build !amd64 // +build !amd64 diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_amd64.s b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_amd64.s similarity index 91% rename from vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_amd64.s rename to vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_amd64.s index 37f69471..20c6d1c6 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_amd64.s +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_amd64.s @@ -16,7 +16,7 @@ #include "textflag.h" -#define NMSPINNING_OFFSET 92 // +checkoffset runtime schedt.nmspinning +#define NMSPINNING_OFFSET 100 TEXT ·addrOfSpinning(SB),NOSPLIT|NOFRAME,$0-8 LEAQ runtime·sched(SB), AX diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_arm64.s b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_arm64.s new file mode 100644 index 00000000..c04da8ac --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_asm_impl_arm64.s @@ -0,0 +1,18 @@ +// Copyright 2023 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !amd64 + +// This file is intentionally left blank. arm64 doesn't use +// addrOfSpinning, but we still need an input to the nogo template rule. diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_other.s b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_other.s index b6391d2b..bafac6a8 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_other.s +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_spinning_other.s @@ -15,4 +15,10 @@ //go:build !amd64 // This file is intentionally left blank. Other arches don't use -// addrOfSpinning, but we still need an input to the nogo template rule. +// addrOfSpinning, but because this package is partially used in Netstack, we +// should support arches that aren't amd64 or arm64. Having this file here +// ensures that `go build` doesn't compile the package with the `-complete` +// flag, because the package isn't made up of just '.go' files. +// This allows Netstack to use the architecture-independent portions of this +// package, because the architecture-dependent portions are never compiled in +// the first place. diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_unsafe.go index 5bc0a92e..acaf173d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/runtime_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/runtime_unsafe.go @@ -1,7 +1,8 @@ // Copyright 2020 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. // //go:linkname directives type-checked by checklinkname. // Runtime type copies checked by checkoffset. diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/rwmutex_unsafe.go b/vendor/gvisor.dev/gvisor/pkg/sync/rwmutex_unsafe.go index 24400bb7..921c65b2 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/rwmutex_unsafe.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/rwmutex_unsafe.go @@ -1,7 +1,9 @@ // Copyright 2009 The Go Authors. All rights reserved. // Copyright 2019 The gVisor Authors. +// // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. // This is mostly copied from the standard library's sync/rwmutex.go. // diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/seqcount.go b/vendor/gvisor.dev/gvisor/pkg/sync/seqcount.go index c90d2d9f..71fa4c30 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/seqcount.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/seqcount.go @@ -1,7 +1,8 @@ // Copyright 2019 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. package sync diff --git a/vendor/gvisor.dev/gvisor/pkg/sync/sync.go b/vendor/gvisor.dev/gvisor/pkg/sync/sync.go index a9bf146d..c78b1e74 100644 --- a/vendor/gvisor.dev/gvisor/pkg/sync/sync.go +++ b/vendor/gvisor.dev/gvisor/pkg/sync/sync.go @@ -1,7 +1,8 @@ // Copyright 2019 The gVisor Authors. // // Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd. // Package sync provides synchronization primitives. // diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/adapters/gonet/gonet.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/adapters/gonet/gonet.go index 9ad06ab2..1a14663b 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/adapters/gonet/gonet.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/adapters/gonet/gonet.go @@ -183,7 +183,7 @@ func (d *deadlineTimer) setDeadline(cancelCh *chan struct{}, timer **time.Timer, return } - timeout := t.Sub(time.Now()) + timeout := time.Until(t) if timeout <= 0 { close(*cancelCh) return diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/errors.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/errors.go index 0df3d885..f6ed3617 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/errors.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/errors.go @@ -620,4 +620,22 @@ func (*ErrMulticastInputCannotBeOutput) IgnoreStats() bool { } func (*ErrMulticastInputCannotBeOutput) String() string { return "output cannot contain input" } +// ErrEndpointBusy indicates that the operation cannot be completed because the +// endpoint is busy. +// +// +stateify savable +type ErrEndpointBusy struct{} + +// isError implements Error. +func (*ErrEndpointBusy) isError() {} + +// IgnoreStats implements Error. +func (*ErrEndpointBusy) IgnoreStats() bool { + return true +} + +func (*ErrEndpointBusy) String() string { + return "operation cannot be completed because the endpoint is busy" +} + // LINT.ThenChange(../syserr/netstack.go) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv4.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv4.go index d6801199..a0f1cabb 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv4.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv4.go @@ -603,6 +603,9 @@ const ( // IPv4OptionTimestampType is the option type for the Timestamp option. IPv4OptionTimestampType IPv4OptionType = 68 + // IPv4OptionExperimentType is the option type for the Experiment option. + IPv4OptionExperimentType IPv4OptionType = 30 + // ipv4OptionTypeOffset is the offset in an option of its type field. ipv4OptionTypeOffset = 0 @@ -800,6 +803,17 @@ func (i *IPv4OptionIterator) Next() (IPv4Option, bool, *IPv4OptParameterProblem) } retval := IPv4OptionRouterAlert(optionBody) return &retval, false, nil + + case IPv4OptionExperimentType: + if optLen != IPv4OptionExperimentLength { + i.ErrCursor++ + return nil, false, &IPv4OptParameterProblem{ + Pointer: i.ErrCursor, + NeedICMP: true, + } + } + retval := IPv4OptionExperiment(optionBody) + return &retval, false, nil } retval := IPv4OptionGeneric(optionBody) return &retval, false, nil @@ -1074,6 +1088,35 @@ func (ra *IPv4OptionRouterAlert) Value() uint16 { return binary.BigEndian.Uint16(ra.Contents()[IPv4OptionRouterAlertValueOffset:]) } +// Experiment option specific related constants. +const ( + // IPv4OptionExperimentLength is the length of an Experiment option. + IPv4OptionExperimentLength = 4 + + // IPv4OptionExperimentValueOffset is the offset for the value of an + // Experiment option. + IPv4OptionExperimentValueOffset = 2 +) + +var _ IPv4Option = (*IPv4OptionExperiment)(nil) + +// IPv4OptionExperiment is an IPv4 option defined by RFC 4727. +type IPv4OptionExperiment []byte + +// Type implements IPv4Option. +func (*IPv4OptionExperiment) Type() IPv4OptionType { return IPv4OptionExperimentType } + +// Size implements IPv4Option. +func (*IPv4OptionExperiment) Size() uint8 { return uint8(IPv4OptionExperimentLength) } + +// Contents implements IPv4Option. +func (ex *IPv4OptionExperiment) Contents() []byte { return *ex } + +// Value returns the value of the IPv4OptionRouterAlert. +func (ex *IPv4OptionExperiment) Value() uint16 { + return binary.BigEndian.Uint16(ex.Contents()[IPv4OptionExperimentValueOffset:]) +} + // IPv4SerializableOption is an interface to represent serializable IPv4 option // types. type IPv4SerializableOption interface { @@ -1179,6 +1222,28 @@ func (o *IPv4SerializableRouterAlertOption) serializeInto(buffer []byte) uint8 { return o.length() } +var _ IPv4SerializableOptionPayload = (*IPv4SerializableExperimentOption)(nil) +var _ IPv4SerializableOption = (*IPv4SerializableExperimentOption)(nil) + +// IPv4SerializableExperimentOption provides serialization for the IPv4 +// Experiment option. +type IPv4SerializableExperimentOption struct { + Tag uint16 +} + +func (*IPv4SerializableExperimentOption) optionType() IPv4OptionType { + return IPv4OptionExperimentType +} + +func (*IPv4SerializableExperimentOption) length() uint8 { + return IPv4OptionExperimentLength - IPv4OptionExperimentValueOffset +} + +func (o *IPv4SerializableExperimentOption) serializeInto(buffer []byte) uint8 { + binary.BigEndian.PutUint16(buffer, o.Tag) + return o.length() +} + var _ IPv4SerializableOption = (*IPv4SerializableNOPOption)(nil) // IPv4SerializableNOPOption provides serialization for the IPv4 no-op option. diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv6_extension_headers.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv6_extension_headers.go index 7f75b82b..1359f995 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv6_extension_headers.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/ipv6_extension_headers.go @@ -49,6 +49,10 @@ const ( // of an IPv6 payload, as per RFC 8200 section 4.7. IPv6NoNextHeaderIdentifier IPv6ExtensionHeaderIdentifier = 59 + // IPv6ExperimentExtHdrIdentifier is the header identifier of an Experiment + // extension header, as per RFC 4727 section 3.3. + IPv6ExperimentExtHdrIdentifier IPv6ExtensionHeaderIdentifier = 253 + // IPv6UnknownExtHdrIdentifier is reserved by IANA. // https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#extension-header // "254 Use for experimentation and testing [RFC3692][RFC4727]" @@ -411,6 +415,17 @@ type IPv6DestinationOptionsExtHdr struct { // isIPv6PayloadHeader implements IPv6PayloadHeader.isIPv6PayloadHeader. func (IPv6DestinationOptionsExtHdr) isIPv6PayloadHeader() {} +// IPv6ExperimentExtHdr is a buffer holding the Experiment extension header. +type IPv6ExperimentExtHdr struct { + Value uint16 +} + +// Release implements IPv6PayloadHeader.Release. +func (IPv6ExperimentExtHdr) Release() {} + +// isIPv6PayloadHeader implements IPv6PayloadHeader.isIPv6PayloadHeader. +func (IPv6ExperimentExtHdr) isIPv6PayloadHeader() {} + // IPv6RoutingExtHdr is a buffer holding the Routing extension header specific // data as outlined in RFC 8200 section 4.4. type IPv6RoutingExtHdr struct { @@ -580,7 +595,7 @@ func (i *IPv6PayloadIterator) Next() (IPv6PayloadHeader, bool, error) { // Is the header we are parsing a known extension header? switch i.nextHdrIdentifier { case IPv6HopByHopOptionsExtHdrIdentifier: - nextHdrIdentifier, view, err := i.nextHeaderData(false /* fragmentHdr */, nil) + nextHdrIdentifier, view, err := i.nextHeaderData(false /* ignoreLength */, nil) if err != nil { return nil, true, err } @@ -588,7 +603,7 @@ func (i *IPv6PayloadIterator) Next() (IPv6PayloadHeader, bool, error) { i.nextHdrIdentifier = nextHdrIdentifier return IPv6HopByHopOptionsExtHdr{ipv6OptionsExtHdr{view}}, false, nil case IPv6RoutingExtHdrIdentifier: - nextHdrIdentifier, view, err := i.nextHeaderData(false /* fragmentHdr */, nil) + nextHdrIdentifier, view, err := i.nextHeaderData(false /* ignoreLength */, nil) if err != nil { return nil, true, err } @@ -599,7 +614,7 @@ func (i *IPv6PayloadIterator) Next() (IPv6PayloadHeader, bool, error) { var data [6]byte // We ignore the returned bytes because we know the fragment extension // header specific data will fit in data. - nextHdrIdentifier, _, err := i.nextHeaderData(true /* fragmentHdr */, data[:]) + nextHdrIdentifier, _, err := i.nextHeaderData(true /* ignoreLength */, data[:]) if err != nil { return nil, true, err } @@ -618,13 +633,24 @@ func (i *IPv6PayloadIterator) Next() (IPv6PayloadHeader, bool, error) { i.nextHdrIdentifier = nextHdrIdentifier return fragmentExtHdr, false, nil case IPv6DestinationOptionsExtHdrIdentifier: - nextHdrIdentifier, view, err := i.nextHeaderData(false /* fragmentHdr */, nil) + nextHdrIdentifier, view, err := i.nextHeaderData(false /* ignoreLength */, nil) if err != nil { return nil, true, err } i.nextHdrIdentifier = nextHdrIdentifier return IPv6DestinationOptionsExtHdr{ipv6OptionsExtHdr{view}}, false, nil + case IPv6ExperimentExtHdrIdentifier: + var data [IPv6ExperimentHdrLength - ipv6ExperimentHdrValueOffset]byte + nextHdrIdentifier, _, err := i.nextHeaderData(true /* ignoreLength */, data[:]) + if err != nil { + return nil, true, err + } + i.nextHdrIdentifier = nextHdrIdentifier + hdr := IPv6ExperimentExtHdr{ + Value: binary.BigEndian.Uint16(data[:ipv6ExperimentHdrTagLength]), + } + return hdr, false, nil case IPv6NoNextHeaderIdentifier: // This indicates the end of the IPv6 payload. return nil, true, nil @@ -644,14 +670,14 @@ func (i *IPv6PayloadIterator) NextHeaderIdentifier() IPv6ExtensionHeaderIdentifi // nextHeaderData returns the extension header's Next Header field and raw data. // -// fragmentHdr indicates that the extension header being parsed is the Fragment -// extension header so the Length field should be ignored as it is Reserved -// for the Fragment extension header. +// ignoreLength indicates that the extension header being parsed should ignore +// the Length field as it is reserved. This is for the Fragment and Experiment +// extension headers. // // If bytes is not nil, extension header specific data will be read into bytes // if it has enough capacity. If bytes is provided but does not have enough // capacity for the data, nextHeaderData will panic. -func (i *IPv6PayloadIterator) nextHeaderData(fragmentHdr bool, bytes []byte) (IPv6ExtensionHeaderIdentifier, *buffer.View, error) { +func (i *IPv6PayloadIterator) nextHeaderData(ignoreLength bool, bytes []byte) (IPv6ExtensionHeaderIdentifier, *buffer.View, error) { // We ignore the number of bytes read because we know we will only ever read // at max 1 bytes since rune has a length of 1. If we read 0 bytes, the Read // would return io.EOF to indicate that io.Reader has reached the end of the @@ -667,13 +693,13 @@ func (i *IPv6PayloadIterator) nextHeaderData(fragmentHdr bool, bytes []byte) (IP length, err = rdr.ReadByte() if err != nil { - if fragmentHdr { + if ignoreLength { return 0, nil, fmt.Errorf("error when reading the Length field for extension header with id = %d: %w", i.nextHdrIdentifier, err) } return 0, nil, fmt.Errorf("error when reading the Reserved field for extension header with id = %d: %w", i.nextHdrIdentifier, err) } - if fragmentHdr { + if ignoreLength { length = 0 } @@ -689,7 +715,7 @@ func (i *IPv6PayloadIterator) nextHeaderData(fragmentHdr bool, bytes []byte) (IP i.nextOffset += uint32((length + 1) * ipv6ExtHdrLenBytesPerUnit) bytesLen := int(length)*ipv6ExtHdrLenBytesPerUnit + ipv6ExtHdrLenBytesExcluded - if fragmentHdr { + if ignoreLength { if n := len(bytes); n < bytesLen { panic(fmt.Sprintf("bytes only has space for %d bytes but need space for %d bytes (length = %d) for extension header with id = %d", n, bytesLen, length, i.nextHdrIdentifier)) } @@ -735,6 +761,36 @@ type IPv6SerializableExtHdr interface { serializeInto(nextHeader uint8, b []byte) int } +// ipv6RouterAlertPayloadLength is the length of the Router Alert payload +// as defined in RFC 4727 section 3.3. +const ( + IPv6ExperimentHdrLength = 8 + ipv6ExperimentNextHeaderOffset = 0 + ipv6ExperimentLengthOffset = 1 + ipv6ExperimentHdrValueOffset = 2 + ipv6ExperimentHdrTagLength = 2 +) + +var _ IPv6SerializableExtHdr = (*IPv6ExperimentExtHdr)(nil) + +// identifier implements IPv6SerializableExtHdr. +func (h IPv6ExperimentExtHdr) identifier() IPv6ExtensionHeaderIdentifier { + return IPv6ExperimentExtHdrIdentifier +} + +// length implements IPv6SerializableExtHdr. +func (h IPv6ExperimentExtHdr) length() int { + return IPv6ExperimentHdrLength +} + +// serializeInto implements IPv6SerializableExtHdr. +func (h IPv6ExperimentExtHdr) serializeInto(nextHeader uint8, b []byte) int { + b[ipv6ExperimentNextHeaderOffset] = nextHeader + b[ipv6ExperimentLengthOffset] = (IPv6ExperimentHdrLength / ipv6ExtHdrLenBytesPerUnit) - 1 + binary.BigEndian.PutUint16(b[ipv6ExperimentHdrValueOffset:][:ipv6ExperimentHdrTagLength], uint16(h.Value)) + return IPv6ExperimentHdrLength +} + var _ IPv6SerializableExtHdr = (*IPv6SerializableHopByHopExtHdr)(nil) // IPv6SerializableHopByHopExtHdr implements serialization of the Hop by Hop diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/tcp.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/tcp.go index fe41e8d4..22d6b264 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/tcp.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/tcp.go @@ -17,7 +17,6 @@ package header import ( "encoding/binary" - "github.com/google/btree" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/checksum" "gvisor.dev/gvisor/pkg/tcpip/seqnum" @@ -175,11 +174,6 @@ type SACKBlock struct { End seqnum.Value } -// Less returns true if r.Start < b.Start. -func (r SACKBlock) Less(b btree.Item) bool { - return r.Start.LessThan(b.(SACKBlock).Start) -} - // Contains returns true if b is completely contained in r. func (r SACKBlock) Contains(b SACKBlock) bool { return r.Start.LessThanEq(b.Start) && b.End.LessThanEq(r.End) @@ -219,9 +213,8 @@ const ( // TCPTotalHeaderMaximumSize is the maximum size of headers from all layers in // a TCP packet. It analogous to MAX_TCP_HEADER in Linux. // - // TODO(b/319936470): Investigate why this needs to be at least 140 bytes. In - // Linux this value is at least 160, but in theory we should be able to use - // 138. In practice anything less than 140 starts to break GSO on gVNIC + // Note: In Linux this value is at least 160, but in theory we should be able + // to use 138. In practice anything less than 140 starts to break GSO on gVNIC // hardware. TCPTotalHeaderMaximumSize = 160 diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/virtionet.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/virtionet.go index e6b0c71b..0958c68d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/header/virtionet.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/header/virtionet.go @@ -87,8 +87,8 @@ func (v VirtioNetHeader) CSumOffset() uint16 { func (v VirtioNetHeader) Encode(f *VirtioNetHeaderFields) { v[flags] = uint8(f.Flags) v[gsoType] = uint8(f.GSOType) - binary.BigEndian.PutUint16(v[hdrLen:], f.HdrLen) - binary.BigEndian.PutUint16(v[gsoSize:], f.GSOSize) - binary.BigEndian.PutUint16(v[csumStart:], f.CSumStart) - binary.BigEndian.PutUint16(v[csumOffset:], f.CSumOffset) + binary.LittleEndian.PutUint16(v[hdrLen:], f.HdrLen) + binary.LittleEndian.PutUint16(v[gsoSize:], f.GSOSize) + binary.LittleEndian.PutUint16(v[csumStart:], f.CSumStart) + binary.LittleEndian.PutUint16(v[csumOffset:], f.CSumOffset) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/link/nested/nested.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/link/nested/nested.go index 66c95689..ea6ec9db 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/link/nested/nested.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/link/nested/nested.go @@ -176,3 +176,8 @@ func (e *Endpoint) Close() { func (e *Endpoint) SetOnCloseAction(action func()) { e.child.SetOnCloseAction(action) } + +// Child returns the child endpoint. +func (e *Endpoint) Child() stack.LinkEndpoint { + return e.child +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/link/sniffer/sniffer.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/link/sniffer/sniffer.go index 583e7d89..5f20743c 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/link/sniffer/sniffer.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/link/sniffer/sniffer.go @@ -39,11 +39,6 @@ import ( // package. Valid values are 0 or 1. var LogPackets atomicbitops.Uint32 = atomicbitops.FromUint32(1) -// LogPacketsToPCAP is a flag used to enable or disable logging packets to a -// pcap writer. Valid values are 0 or 1. A writer must have been specified when the -// sniffer was created for this flag to have effect. -var LogPacketsToPCAP atomicbitops.Uint32 = atomicbitops.FromUint32(1) - // Endpoint is used to sniff and log network traffic. // // +stateify savable @@ -155,11 +150,10 @@ func (e *Endpoint) DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pk // DumpPacket logs a packet, depending on configuration, to stderr and/or a // pcap file. ts is an optional timestamp for the packet. func (e *Endpoint) DumpPacket(dir Direction, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, ts *time.Time) { - writer := e.writer if LogPackets.Load() == 1 { LogPacket(e.logPrefix, dir, protocol, pkt) } - if writer != nil && LogPacketsToPCAP.Load() == 1 { + if e.writer != nil { packet := pcapPacket{ packet: pkt, maxCaptureLen: int(e.maxPCAPLen), @@ -173,7 +167,7 @@ func (e *Endpoint) DumpPacket(dir Direction, protocol tcpip.NetworkProtocolNumbe if err != nil { panic(err) } - if _, err := writer.Write(b); err != nil { + if _, err := e.writer.Write(b); err != nil { panic(err) } } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/arp/arp.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/arp/arp.go index e05f1889..f416fe82 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/arp/arp.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/arp/arp.go @@ -128,6 +128,11 @@ func (e *endpoint) MTU() uint32 { return lmtu - uint32(e.MaxHeaderLength()) } +// EndpointHeaderSize returns the size necessary for the ARP header. +func (e *endpoint) EndpointHeaderSize() uint32 { + return header.ARPSize +} + func (e *endpoint) MaxHeaderLength() uint16 { return e.nic.MaxHeaderLength() + header.ARPSize } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation/reassembler.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation/reassembler.go index 9aaad763..233f43a0 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation/reassembler.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation/reassembler.go @@ -135,7 +135,7 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *s last: last, filled: true, final: currentHole.final, - pkt: pkt.IncRef(), + pkt: pkt.Clone(), } r.filled++ // For IPv6, it is possible to have different Protocol values between @@ -150,7 +150,7 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *s if r.pkt != nil { r.pkt.DecRef() } - r.pkt = pkt.IncRef() + r.pkt = pkt.Clone() r.proto = proto } break diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/duplicate_address_detection.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/duplicate_address_detection.go index 66661f3c..cfde03a0 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/duplicate_address_detection.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/duplicate_address_detection.go @@ -39,7 +39,7 @@ type dadState struct { extendRequest extendRequest done *bool - timer tcpip.Timer + timer tcpip.Timer `state:"nosave"` completionHandlers []stack.DADCompletionHandler } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go index 3e7ca67e..4a18f3d7 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go @@ -291,13 +291,13 @@ type GenericMulticastProtocolState struct { robustnessVariable uint8 queryInterval time.Duration mode protocolMode - modeTimer tcpip.Timer + modeTimer tcpip.Timer `state:"nosave"` - generalQueryV2Timer tcpip.Timer + generalQueryV2Timer tcpip.Timer `state:"nosave"` // TODO(b/341946753): Restore when netstack is savable. generalQueryV2TimerFiresAt time.Time `state:"nosave"` - stateChangedReportV2Timer tcpip.Timer + stateChangedReportV2Timer tcpip.Timer `state:"nosave"` stateChangedReportV2TimerSet bool } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/ip_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/ip_state_autogen.go index 96a7a77a..5e541c88 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/ip_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/ip_state_autogen.go @@ -17,7 +17,6 @@ func (d *dadState) StateFields() []string { "nonce", "extendRequest", "done", - "timer", "completionHandlers", } } @@ -30,8 +29,7 @@ func (d *dadState) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(0, &d.nonce) stateSinkObject.Save(1, &d.extendRequest) stateSinkObject.Save(2, &d.done) - stateSinkObject.Save(3, &d.timer) - stateSinkObject.Save(4, &d.completionHandlers) + stateSinkObject.Save(3, &d.completionHandlers) } func (d *dadState) afterLoad(context.Context) {} @@ -41,8 +39,7 @@ func (d *dadState) StateLoad(ctx context.Context, stateSourceObject state.Source stateSourceObject.Load(0, &d.nonce) stateSourceObject.Load(1, &d.extendRequest) stateSourceObject.Load(2, &d.done) - stateSourceObject.Load(3, &d.timer) - stateSourceObject.Load(4, &d.completionHandlers) + stateSourceObject.Load(3, &d.completionHandlers) } func (d *DADOptions) StateTypeName() string { @@ -237,9 +234,6 @@ func (g *GenericMulticastProtocolState) StateFields() []string { "robustnessVariable", "queryInterval", "mode", - "modeTimer", - "generalQueryV2Timer", - "stateChangedReportV2Timer", "stateChangedReportV2TimerSet", } } @@ -254,10 +248,7 @@ func (g *GenericMulticastProtocolState) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(2, &g.robustnessVariable) stateSinkObject.Save(3, &g.queryInterval) stateSinkObject.Save(4, &g.mode) - stateSinkObject.Save(5, &g.modeTimer) - stateSinkObject.Save(6, &g.generalQueryV2Timer) - stateSinkObject.Save(7, &g.stateChangedReportV2Timer) - stateSinkObject.Save(8, &g.stateChangedReportV2TimerSet) + stateSinkObject.Save(5, &g.stateChangedReportV2TimerSet) } func (g *GenericMulticastProtocolState) afterLoad(context.Context) {} @@ -269,10 +260,7 @@ func (g *GenericMulticastProtocolState) StateLoad(ctx context.Context, stateSour stateSourceObject.Load(2, &g.robustnessVariable) stateSourceObject.Load(3, &g.queryInterval) stateSourceObject.Load(4, &g.mode) - stateSourceObject.Load(5, &g.modeTimer) - stateSourceObject.Load(6, &g.generalQueryV2Timer) - stateSourceObject.Load(7, &g.stateChangedReportV2Timer) - stateSourceObject.Load(8, &g.stateChangedReportV2TimerSet) + stateSourceObject.Load(5, &g.stateChangedReportV2TimerSet) } func (m *MultiCounterIPForwardingStats) StateTypeName() string { @@ -294,6 +282,7 @@ func (m *MultiCounterIPForwardingStats) StateFields() []string { "NoMulticastPendingQueueBufferSpace", "OutgoingDeviceNoBufferSpace", "Errors", + "OutgoingDeviceClosedForSend", } } @@ -315,6 +304,7 @@ func (m *MultiCounterIPForwardingStats) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(10, &m.NoMulticastPendingQueueBufferSpace) stateSinkObject.Save(11, &m.OutgoingDeviceNoBufferSpace) stateSinkObject.Save(12, &m.Errors) + stateSinkObject.Save(13, &m.OutgoingDeviceClosedForSend) } func (m *MultiCounterIPForwardingStats) afterLoad(context.Context) {} @@ -334,6 +324,7 @@ func (m *MultiCounterIPForwardingStats) StateLoad(ctx context.Context, stateSour stateSourceObject.Load(10, &m.NoMulticastPendingQueueBufferSpace) stateSourceObject.Load(11, &m.OutgoingDeviceNoBufferSpace) stateSourceObject.Load(12, &m.Errors) + stateSourceObject.Load(13, &m.OutgoingDeviceClosedForSend) } func (m *MultiCounterIPStats) StateTypeName() string { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/stats.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/stats.go index 85990f5d..53cd55ae 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/stats.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/ip/stats.go @@ -78,6 +78,10 @@ type MultiCounterIPForwardingStats struct { // Errors is the number of IP packets received which could not be // successfully forwarded. Errors tcpip.MultiCounterStat + + // OutgoingDeviceClosedForSend is the number of packets that were dropped due + // to the outgoing device being closed for send. + OutgoingDeviceClosedForSend tcpip.MultiCounterStat } // Init sets internal counters to track a and b counters. @@ -95,9 +99,10 @@ func (m *MultiCounterIPForwardingStats) Init(a, b *tcpip.IPForwardingStats) { m.UnknownOutputEndpoint.Init(a.UnknownOutputEndpoint, b.UnknownOutputEndpoint) m.NoMulticastPendingQueueBufferSpace.Init(a.NoMulticastPendingQueueBufferSpace, b.NoMulticastPendingQueueBufferSpace) m.OutgoingDeviceNoBufferSpace.Init(a.OutgoingDeviceNoBufferSpace, b.OutgoingDeviceNoBufferSpace) + m.OutgoingDeviceClosedForSend.Init(a.OutgoingDeviceClosedForSend, b.OutgoingDeviceClosedForSend) } -// LINT.ThenChange(:MultiCounterIPForwardingStats, ../../../tcpip.go:IPForwardingStats) +// LINT.ThenChange(../../../tcpip.go:IPForwardingStats) // LINT.IfChange(MultiCounterIPStats) @@ -211,4 +216,4 @@ func (m *MultiCounterIPStats) Init(a, b *tcpip.IPStats) { m.Forwarding.Init(&a.Forwarding, &b.Forwarding) } -// LINT.ThenChange(:MultiCounterIPStats, ../../../tcpip.go:IPStats) +// LINT.ThenChange(../../../tcpip.go:IPStats) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/multicast_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/multicast_state_autogen.go index ecf8fc26..e26e0973 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/multicast_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/multicast_state_autogen.go @@ -16,7 +16,6 @@ func (r *RouteTable) StateFields() []string { return []string{ "installedRoutes", "pendingRoutes", - "cleanupPendingRoutesTimer", "isCleanupRoutineRunning", "config", } @@ -29,9 +28,8 @@ func (r *RouteTable) StateSave(stateSinkObject state.Sink) { r.beforeSave() stateSinkObject.Save(0, &r.installedRoutes) stateSinkObject.Save(1, &r.pendingRoutes) - stateSinkObject.Save(2, &r.cleanupPendingRoutesTimer) - stateSinkObject.Save(3, &r.isCleanupRoutineRunning) - stateSinkObject.Save(4, &r.config) + stateSinkObject.Save(2, &r.isCleanupRoutineRunning) + stateSinkObject.Save(3, &r.config) } func (r *RouteTable) afterLoad(context.Context) {} @@ -40,9 +38,8 @@ func (r *RouteTable) afterLoad(context.Context) {} func (r *RouteTable) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &r.installedRoutes) stateSourceObject.Load(1, &r.pendingRoutes) - stateSourceObject.Load(2, &r.cleanupPendingRoutesTimer) - stateSourceObject.Load(3, &r.isCleanupRoutineRunning) - stateSourceObject.Load(4, &r.config) + stateSourceObject.Load(2, &r.isCleanupRoutineRunning) + stateSourceObject.Load(3, &r.config) } func (r *InstalledRoute) StateTypeName() string { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/route_table.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/route_table.go index d74aa31f..10f895bd 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/route_table.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast/route_table.go @@ -57,7 +57,7 @@ type RouteTable struct { // cleanupPendingRoutesTimer is a timer that triggers a routine to remove // pending routes that are expired. // +checklocks:pendingMu - cleanupPendingRoutesTimer tcpip.Timer + cleanupPendingRoutesTimer tcpip.Timer `state:"nosave"` // +checklocks:pendingMu isCleanupRoutineRunning bool diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/icmp.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/icmp.go index 8e96ca80..980fade1 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/icmp.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/icmp.go @@ -16,6 +16,7 @@ package ipv4 import ( "fmt" + "math" "gvisor.dev/gvisor/pkg/buffer" "gvisor.dev/gvisor/pkg/tcpip" @@ -356,12 +357,20 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) { replyData := stack.PayloadSince(pkt.TransportHeader()) defer replyData.Release() ipHdr := header.IPv4(pkt.NetworkHeader().Slice()) + localAddressTemporary := pkt.NetworkPacketInfo.LocalAddressTemporary localAddressBroadcast := pkt.NetworkPacketInfo.LocalAddressBroadcast - // It's possible that a raw socket expects to receive this. + // It's possible that a raw socket or custom defaultHandler expects to + // receive this packet. e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt) pkt = nil + // Skip direct ICMP echo reply if the packet was received with a temporary + // address, allowing custom handlers to take over. + if localAddressTemporary { + return + } + sent := e.stats.icmp.packetsSent if !e.protocol.allowICMPReply(header.ICMPv4EchoReply, header.ICMPv4UnusedCode) { sent.rateLimited.Increment() @@ -591,7 +600,12 @@ func (*icmpReasonNetworkUnreachable) isICMPReason() {} // icmpReasonFragmentationNeeded is an error where a packet requires // fragmentation while also having the Don't Fragment flag set, as per RFC 792 // page 3, Destination Unreachable Message. -type icmpReasonFragmentationNeeded struct{} +type icmpReasonFragmentationNeeded struct { + // mtu is the MTU of the next-hop link. Per RFC 1191 §4, this value + // must be included in the ICMP Fragmentation Needed message so the + // sender can update its path MTU cache. + mtu uint32 +} func (*icmpReasonFragmentationNeeded) isICMPReason() {} @@ -696,30 +710,36 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer, deliv } sent := netEP.stats.icmp.packetsSent - icmpType, icmpCode, counter, pointer := func() (header.ICMPv4Type, header.ICMPv4Code, tcpip.MultiCounterStat, byte) { + icmpType, icmpCode, counter, pointer, nextHopMTU := func() (header.ICMPv4Type, header.ICMPv4Code, tcpip.MultiCounterStat, byte, uint16) { switch reason := reason.(type) { case *icmpReasonNetworkProhibited: - return header.ICMPv4DstUnreachable, header.ICMPv4NetProhibited, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4NetProhibited, sent.dstUnreachable, 0, 0 case *icmpReasonHostProhibited: - return header.ICMPv4DstUnreachable, header.ICMPv4HostProhibited, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4HostProhibited, sent.dstUnreachable, 0, 0 case *icmpReasonAdministrativelyProhibited: - return header.ICMPv4DstUnreachable, header.ICMPv4AdminProhibited, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4AdminProhibited, sent.dstUnreachable, 0, 0 case *icmpReasonPortUnreachable: - return header.ICMPv4DstUnreachable, header.ICMPv4PortUnreachable, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4PortUnreachable, sent.dstUnreachable, 0, 0 case *icmpReasonProtoUnreachable: - return header.ICMPv4DstUnreachable, header.ICMPv4ProtoUnreachable, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4ProtoUnreachable, sent.dstUnreachable, 0, 0 case *icmpReasonNetworkUnreachable: - return header.ICMPv4DstUnreachable, header.ICMPv4NetUnreachable, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4NetUnreachable, sent.dstUnreachable, 0, 0 case *icmpReasonHostUnreachable: - return header.ICMPv4DstUnreachable, header.ICMPv4HostUnreachable, sent.dstUnreachable, 0 + return header.ICMPv4DstUnreachable, header.ICMPv4HostUnreachable, sent.dstUnreachable, 0, 0 case *icmpReasonFragmentationNeeded: - return header.ICMPv4DstUnreachable, header.ICMPv4FragmentationNeeded, sent.dstUnreachable, 0 + // Per RFC 1191 §4, include the next-hop MTU in the ICMP message. + // Cap at MaxUint16 since the field is 16 bits wide. + mtu := reason.mtu + if mtu > math.MaxUint16 { + mtu = math.MaxUint16 + } + return header.ICMPv4DstUnreachable, header.ICMPv4FragmentationNeeded, sent.dstUnreachable, 0, uint16(mtu) case *icmpReasonTTLExceeded: - return header.ICMPv4TimeExceeded, header.ICMPv4TTLExceeded, sent.timeExceeded, 0 + return header.ICMPv4TimeExceeded, header.ICMPv4TTLExceeded, sent.timeExceeded, 0, 0 case *icmpReasonReassemblyTimeout: - return header.ICMPv4TimeExceeded, header.ICMPv4ReassemblyTimeout, sent.timeExceeded, 0 + return header.ICMPv4TimeExceeded, header.ICMPv4ReassemblyTimeout, sent.timeExceeded, 0, 0 case *icmpReasonParamProblem: - return header.ICMPv4ParamProblem, header.ICMPv4UnusedCode, sent.paramProblem, reason.pointer + return header.ICMPv4ParamProblem, header.ICMPv4UnusedCode, sent.paramProblem, reason.pointer, 0 default: panic(fmt.Sprintf("unsupported ICMP type %T", reason)) } @@ -788,6 +808,7 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer, deliv icmpHdr.SetCode(icmpCode) icmpHdr.SetType(icmpType) icmpHdr.SetPointer(pointer) + icmpHdr.SetMTU(nextHopMTU) icmpHdr.SetChecksum(header.ICMPv4Checksum(icmpHdr, icmpPkt.Data().Checksum())) if err := route.WritePacket( diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/ipv4.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/ipv4.go index e2721a4d..714cf980 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/ipv4.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/network/ipv4/ipv4.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/buffer" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -69,6 +70,8 @@ const ( forwardingEnabled = 1 ) +var martianPacketLogger = log.BasicRateLimitedLogger(time.Minute) + var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix() var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) @@ -79,6 +82,7 @@ var _ stack.AddressableEndpoint = (*endpoint)(nil) var _ stack.NetworkEndpoint = (*endpoint)(nil) var _ IGMPEndpoint = (*endpoint)(nil) +// +checklocksalias:igmp.ep.mu=mu // +stateify savable type endpoint struct { nic stack.NetworkInterface @@ -123,13 +127,11 @@ func (e *endpoint) GetIGMPVersion() IGMPVersion { } // +checklocks:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) setIGMPVersionLocked(v IGMPVersion) IGMPVersion { return e.igmp.setVersion(v) } // +checklocksread:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) getIGMPVersionLocked() IGMPVersion { return e.igmp.getVersion() } @@ -288,7 +290,6 @@ func (e *endpoint) Enable() tcpip.Error { } // +checklocks:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) enableLocked() tcpip.Error { // If the NIC is not enabled, the endpoint can't do anything meaningful so // don't enable the endpoint. @@ -359,7 +360,6 @@ func (e *endpoint) Disable() { } // +checklocks:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) disableLocked() { if !e.isEnabled() { return @@ -423,6 +423,11 @@ func (e *endpoint) MTU() uint32 { return networkMTU } +// EndpointHeaderSize returns the size necessary for the IPv4 header. +func (e *endpoint) EndpointHeaderSize() uint32 { + return header.IPv4MinimumSize +} + // MaxHeaderLength returns the maximum length needed by ipv4 headers (and // underlying protocols). func (e *endpoint) MaxHeaderLength() uint16 { @@ -446,6 +451,9 @@ func (e *endpoint) getID() uint16 { } func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { + if expVal := params.ExperimentOptionValue; expVal != 0 { + options = append(options, &header.IPv4SerializableExperimentOption{Tag: expVal}) + } hdrLen := header.IPv4MinimumSize var optLen int if options != nil { @@ -523,16 +531,23 @@ func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { netHeader := header.IPv4(pkt.NetworkHeader().Slice()) dstAddr := netHeader.DestinationAddress() + stk := e.protocol.stack - // iptables filtering. All packets that reach here are locally - // generated. - outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) - if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { + // iptables filtering. All packets that reach here are locally generated. + outNicName := stk.FindNICNameFromID(e.nic.ID()) + if ok := stk.IPTables().CheckOutput(pkt, r, outNicName); !ok { // iptables is telling us to drop the packet. e.stats.ip.IPTablesOutputDropped.Increment() return nil } + if nft := stk.NFTables(); nft != nil && stk.IsNFTablesConfigured() { + if !nft.CheckOutput(pkt, stack.IP) { + // nftables is telling us to drop the packet. + return nil + } + } + // If the packet is manipulated as per DNAT Output rules, handle packet // based on destination address and do not send the packet to link // layer. @@ -563,15 +578,23 @@ func (e *endpoint) writePacketPostRouting(r *stack.Route, pkt *stack.PacketBuffe return nil } + stk := e.protocol.stack // Postrouting NAT can only change the source address, and does not alter the // route or outgoing interface of the packet. - outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) - if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { + outNicName := stk.FindNICNameFromID(e.nic.ID()) + if ok := stk.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { // iptables is telling us to drop the packet. e.stats.ip.IPTablesPostroutingDropped.Increment() return nil } + if nft := stk.NFTables(); nft != nil && stk.IsNFTablesConfigured() { + if !nft.CheckPostrouting(pkt, stack.IP) { + // nftables is telling us to drop the packet. + return nil + } + } + stats := e.stats.ip networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(len(pkt.NetworkHeader().Slice()))) @@ -682,6 +705,13 @@ func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt *stack.PacketB return nil } + if nft := stk.NFTables(); nft != nil && stk.IsNFTablesConfigured() { + if !nft.CheckForward(pkt, stack.IP) { + // nftables is telling us to drop the packet. + return nil + } + } + // We need to do a deep copy of the IP packet because // WriteHeaderIncludedPacket may modify the packet buffer, but we do // not own it. @@ -730,7 +760,9 @@ func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt *stack.PacketB // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit // while sending the packet and returns this error iff fragmentation is // necessary and the bit is also set. - _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt, false /* deliveredLocally */) + _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{ + mtu: forwardToEp.nic.MTU(), + }, pkt, false /* deliveredLocally */) return &ip.ErrMessageTooLong{} case *tcpip.ErrNoBufferSpace: return &ip.ErrOutgoingDeviceNoBufferSpace{} @@ -782,6 +814,13 @@ func (e *endpoint) forwardUnicastPacket(pkt *stack.PacketBuffer) ip.ForwardingEr return nil } + if nft := stk.NFTables(); nft != nil && stk.IsNFTablesConfigured() { + if !nft.CheckForward(pkt, stack.IP) { + // nftables is telling us to drop the packet. + return nil + } + } + // The packet originally arrived on e so provide its NIC as the input NIC. ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) return nil @@ -790,9 +829,7 @@ func (e *endpoint) forwardUnicastPacket(pkt *stack.PacketBuffer) ip.ForwardingEr r, err := stk.FindRoute(0, tcpip.Address{}, dstAddr, ProtocolNumber, false /* multicastLoop */) switch err.(type) { case nil: - // TODO(https://gvisor.dev/issues/8105): We should not observe ErrHostUnreachable from route - // lookups. - case *tcpip.ErrHostUnreachable, *tcpip.ErrNetworkUnreachable: + case *tcpip.ErrNetworkUnreachable: // We return the original error rather than the result of returning // the ICMP packet because the original error is more relevant to // the caller. @@ -839,17 +876,20 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { if !e.nic.IsLoopback() { if !e.protocol.options.AllowExternalLoopbackTraffic { if header.IsV4LoopbackAddress(h.SourceAddress()) { + martianPacketLogger.Infof("Martian packet dropped with loopback source address. If your traffic is unexpectedly dropped, you may want to allow martian packets.") stats.InvalidSourceAddressesReceived.Increment() return } if header.IsV4LoopbackAddress(h.DestinationAddress()) { + martianPacketLogger.Infof("Martian packet dropped with loopback destination address. If your traffic is unexpectedly dropped, you may want to allow martian packets.") stats.InvalidDestinationAddressesReceived.Increment() return } } - if e.protocol.stack.HandleLocal() { + stk := e.protocol.stack + if stk.HandleLocal() { addressEndpoint := e.AcquireAssignedAddress(header.IPv4(pkt.NetworkHeader().Slice()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint, true /* readOnly */) if addressEndpoint != nil { // The source address is one of our own, so we never should have gotten @@ -861,14 +901,23 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { } // Loopback traffic skips the prerouting chain. - inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) - if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { + inNicName := stk.FindNICNameFromID(e.nic.ID()) + if ok := stk.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { // iptables is telling us to drop the packet. stats.IPTablesPreroutingDropped.Increment() return } - } + if nft := stk.NFTables(); nft != nil && stk.IsNFTablesConfigured() { + if !nft.CheckPrerouting(pkt, stack.IP) { + // nftables is telling us to drop the packet. + return + } + } + } + // CheckPrerouting can modify the backing storage of the packet, so refresh + // the header. + h = header.IPv4(pkt.NetworkHeader().Slice()) e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) } @@ -924,22 +973,6 @@ func validateAddressesForForwarding(h header.IPv4) ip.ForwardingError { return &ip.ErrInitializingSourceAddress{} } - // As per RFC 3927 section 7, - // - // A router MUST NOT forward a packet with an IPv4 Link-Local source or - // destination address, irrespective of the router's default route - // configuration or routes obtained from dynamic routing protocols. - // - // A router which receives a packet with an IPv4 Link-Local source or - // destination address MUST NOT forward the packet. This prevents - // forwarding of packets back onto the network segment from which they - // originated, or to any other segment. - if header.IsV4LinkLocalUnicastAddress(srcAddr) { - return &ip.ErrLinkLocalSourceAddress{} - } - if dstAddr := h.DestinationAddress(); header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { - return &ip.ErrLinkLocalDestinationAddress{} - } return nil } @@ -1159,6 +1192,7 @@ func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer, // If the packet is destined for this device, then it should be delivered // locally. Otherwise, if forwarding is enabled, it should be forwarded. if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { + pkt.NetworkPacketInfo.LocalAddressTemporary = addressEndpoint.Temporary() subnet := addressEndpoint.AddressWithPrefix().Subnet() pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast e.deliverPacketLocally(h, pkt, inNICName) @@ -1198,6 +1232,13 @@ func (e *endpoint) handleForwardingError(err ip.ForwardingError) { stats.Forwarding.UnknownOutputEndpoint.Increment() case *ip.ErrOutgoingDeviceNoBufferSpace: stats.Forwarding.OutgoingDeviceNoBufferSpace.Increment() + case *ip.ErrOther: + switch err := err.Err.(type) { + case *tcpip.ErrClosedForSend: + stats.Forwarding.OutgoingDeviceClosedForSend.Increment() + default: + panic(fmt.Sprintf("unrecognized tcpip forwarding error: %s", err)) + } default: panic(fmt.Sprintf("unrecognized forwarding error: %s", err)) } @@ -1206,14 +1247,22 @@ func (e *endpoint) handleForwardingError(err ip.ForwardingError) { func (e *endpoint) deliverPacketLocally(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { stats := e.stats + stk := e.protocol.stack // iptables filtering. All packets that reach here are intended for // this machine and will not be forwarded. - if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { + if ok := stk.IPTables().CheckInput(pkt, inNICName); !ok { // iptables is telling us to drop the packet. stats.ip.IPTablesInputDropped.Increment() return } + if nft := stk.NFTables(); nft != nil && stk.IsNFTablesConfigured() { + if !nft.CheckInput(pkt, stack.IP) { + // nftables is telling us to drop the packet. + return + } + } + if h.More() || h.FragmentOffset() != 0 { if pkt.Data().Size()+len(pkt.TransportHeader().Slice()) == 0 { // Drop the packet as it's marked as a fragment but has @@ -1325,7 +1374,7 @@ func (e *endpoint) deliverPacketLocally(h header.IPv4, pkt *stack.PacketBuffer, } if p == header.IGMPProtocolNumber { e.mu.Lock() - e.igmp.handleIGMP(pkt, hasRouterAlertOption) // +checklocksforce: e == e.igmp.ep. + e.igmp.handleIGMP(pkt, hasRouterAlertOption) e.mu.Unlock() return } @@ -1375,7 +1424,6 @@ func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, p // sendQueuedReports sends queued igmp reports. // // +checklocks:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) sendQueuedReports() { e.igmp.sendQueuedReports() } @@ -1461,7 +1509,6 @@ func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { // joinGroupLocked is like JoinGroup but with locking requirements. // // +checklocks:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { if !header.IsV4MulticastAddress(addr) { return &tcpip.ErrBadAddress{} @@ -1481,7 +1528,6 @@ func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { // leaveGroupLocked is like LeaveGroup but with locking requirements. // // +checklocks:e.mu -// +checklocksalias:e.igmp.ep.mu=e.mu func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { return e.igmp.leaveGroup(addr) } @@ -1490,7 +1536,7 @@ func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { func (e *endpoint) IsInGroup(addr tcpip.Address) bool { e.mu.RLock() defer e.mu.RUnlock() - return e.igmp.isInGroup(addr) // +checklocksforce: e.mu==e.igmp.ep.mu. + return e.igmp.isInGroup(addr) } // Stats implements stack.NetworkEndpoint. @@ -1598,11 +1644,11 @@ func (p *protocol) Close() { func (*protocol) Wait() {} func (p *protocol) validateUnicastSourceAndMulticastDestination(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { - if !p.isUnicastAddress(addresses.Source) || header.IsV4LinkLocalUnicastAddress(addresses.Source) { + if !p.isUnicastAddress(addresses.Source) { return &tcpip.ErrBadAddress{} } - if !header.IsV4MulticastAddress(addresses.Destination) || header.IsV4LinkLocalMulticastAddress(addresses.Destination) { + if !header.IsV4MulticastAddress(addresses.Destination) { return &tcpip.ErrBadAddress{} } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/socketops.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/socketops.go index b8196912..668cd495 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/socketops.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/socketops.go @@ -263,6 +263,10 @@ type SocketOptions struct { // rcvlowat specifies the minimum number of bytes which should be // received to indicate the socket as readable. rcvlowat atomicbitops.Int32 + + // experimentOptionValue is the value set for the IP option experiment header + // if it is not zero. + experimentOptionValue atomicbitops.Uint32 } // InitHandler initializes the handler. This must be called before using the @@ -539,6 +543,17 @@ func (so *SocketOptions) SetLinger(linger LingerOption) { so.mu.Unlock() } +// GetExperimentOptionValue gets value for the experiment IP option header. +func (so *SocketOptions) GetExperimentOptionValue() uint16 { + v := so.experimentOptionValue.Load() + return uint16(v) +} + +// SetExperimentOptionValue sets the value for the experiment IP option header. +func (so *SocketOptions) SetExperimentOptionValue(v uint16) { + so.experimentOptionValue.Store(uint32(v)) +} + // SockErrOrigin represents the constants for error origin. type SockErrOrigin uint8 diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/address_state_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/address_state_mutex.go index 8373da7e..5a95cbb1 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/address_state_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/address_state_mutex.go @@ -92,5 +92,5 @@ func addressStateinitLockNames() {} func init() { addressStateinitLockNames() - addressStateprefixIndex = locking.NewMutexClass(reflect.TypeOf(addressStateRWMutex{}), addressStatelockNames) + addressStateprefixIndex = locking.NewMutexClass(reflect.TypeFor[addressStateRWMutex](), addressStatelockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state.go index bb2e0faf..cf37edb5 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state.go @@ -602,7 +602,7 @@ func (a *AddressableEndpointState) AcquireAssignedAddressOrMatching(localAddr tc // Proceed to add a new temporary endpoint. addr := localAddr.WithPrefix() - ep, err := a.addAndAcquireAddressLocked(addr, AddressProperties{PEB: tempPEB}, Temporary) + ep, err := a.addAndAcquireAddressLocked(addr, AddressProperties{PEB: tempPEB, Temporary: true}, Temporary) if err != nil { // addAndAcquireAddressLocked only returns an error if the address is // already assigned but we just checked above if the address exists so we @@ -738,8 +738,6 @@ func (a *AddressableEndpointState) Cleanup() { var _ AddressEndpoint = (*addressState)(nil) // addressState holds state for an address. -// -// +stateify savable type addressState struct { addressableEndpointState *AddressableEndpointState addr tcpip.AddressWithPrefix @@ -750,7 +748,7 @@ type addressState struct { // // AddressableEndpointState.mu // addressState.mu - mu addressStateRWMutex `state:"nosave"` + mu addressStateRWMutex refs addressStateRefs // checklocks:mu kind AddressKind diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state_mutex.go index 56ea53e3..7a15cd3b 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/addressable_endpoint_state_mutex.go @@ -92,5 +92,5 @@ func addressableEndpointStateinitLockNames() {} func init() { addressableEndpointStateinitLockNames() - addressableEndpointStateprefixIndex = locking.NewMutexClass(reflect.TypeOf(addressableEndpointStateRWMutex{}), addressableEndpointStatelockNames) + addressableEndpointStateprefixIndex = locking.NewMutexClass(reflect.TypeFor[addressableEndpointStateRWMutex](), addressableEndpointStatelockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge.go index 50c8f964..fa7fd56c 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge.go @@ -28,6 +28,22 @@ type bridgePort struct { nic *nic } +// BridgeFDBKey is the MAC address of a device which a bridge port is associated with. +type BridgeFDBKey tcpip.LinkAddress + +// BridgeFDBEntry consists of all metadata for a FDB record. +type BridgeFDBEntry struct { + port *bridgePort +} + +// PortLinkAddress returns the mac address of the device that is bound to the bridge port. +func (e BridgeFDBEntry) PortLinkAddress() tcpip.LinkAddress { + if e.port == nil { + return "" + } + return e.port.nic.LinkAddress() +} + // ParseHeader implements stack.LinkEndpoint. func (p *bridgePort) ParseHeader(pkt *PacketBuffer) bool { _, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize) @@ -37,23 +53,49 @@ func (p *bridgePort) ParseHeader(pkt *PacketBuffer) bool { // DeliverNetworkPacket implements stack.NetworkDispatcher. func (p *bridgePort) DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) { bridge := p.bridge + eth := header.Ethernet(pkt.LinkHeader().Slice()) + updateFDB := false bridge.mu.RLock() - - // Send the packet to all other ports. - for _, port := range bridge.ports { - if p == port { - continue + // Add an entry at the bridge FDB, it maps a MAC address + // to a bridge port where the traffic is received when + // the MAC address is not multicast. + // Network packets that are sent to the learned MAC address + // will be forwarded to the bridge port that is stored in + // the FDB table. + sourceAddress := eth.SourceAddress() + if _, hasSourceFDB := bridge.fdbTable[BridgeFDBKey(sourceAddress)]; !header.IsMulticastEthernetAddress(sourceAddress) && !hasSourceFDB { + updateFDB = true + } + if entry, exist := bridge.fdbTable[BridgeFDBKey(eth.DestinationAddress())]; !exist { + // When no FDB entry is found, send the packet to all ports. + for _, port := range bridge.ports { + if p == port { + continue + } + newPkt := NewPacketBuffer(PacketBufferOptions{ + ReserveHeaderBytes: int(port.nic.MaxHeaderLength()), + Payload: pkt.ToBuffer(), + }) + port.nic.writeRawPacket(newPkt) + newPkt.DecRef() } + } else if entry.port != p { + destPort := entry.port newPkt := NewPacketBuffer(PacketBufferOptions{ - ReserveHeaderBytes: int(port.nic.MaxHeaderLength()), + ReserveHeaderBytes: int(destPort.nic.MaxHeaderLength()), Payload: pkt.ToBuffer(), }) - port.nic.writeRawPacket(newPkt) + destPort.nic.writeRawPacket(newPkt) newPkt.DecRef() } d := bridge.dispatcher bridge.mu.RUnlock() + if updateFDB { + bridge.mu.Lock() + bridge.addFDBEntryLocked(eth.SourceAddress(), p, 0) + bridge.mu.Unlock() + } if d != nil { // The dispatcher may acquire Stack.mu in DeliverNetworkPacket(), which is // ordered above bridge.mu. So call DeliverNetworkPacket() without holding @@ -72,6 +114,7 @@ func NewBridgeEndpoint(mtu uint32) *BridgeEndpoint { addr: tcpip.GetRandMacAddr(), } b.ports = make(map[tcpip.NICID]*bridgePort) + b.fdbTable = make(map[BridgeFDBKey]BridgeFDBEntry) return b } @@ -89,7 +132,9 @@ type BridgeEndpoint struct { // +checklocks:mu attached bool // +checklocks:mu - mtu uint32 + mtu uint32 + // +checklocks:mu + fdbTable map[BridgeFDBKey]BridgeFDBEntry maxHeaderLength atomicbitops.Uint32 } @@ -143,6 +188,12 @@ func (b *BridgeEndpoint) DelNIC(nic *nic) tcpip.Error { b.mu.Lock() defer b.mu.Unlock() + port := b.ports[nic.id] + for k, e := range b.fdbTable { + if e.port == port { + delete(b.fdbTable, k) + } + } delete(b.ports, nic.id) nic.NetworkLinkEndpoint.Attach(nic) return nil @@ -172,8 +223,8 @@ func (b *BridgeEndpoint) MaxHeaderLength() uint16 { // LinkAddress implements stack.LinkEndpoint.LinkAddress. func (b *BridgeEndpoint) LinkAddress() tcpip.LinkAddress { - b.mu.Lock() - defer b.mu.Unlock() + b.mu.RLock() + defer b.mu.RUnlock() return b.addr } @@ -198,6 +249,7 @@ func (b *BridgeEndpoint) Attach(dispatcher NetworkDispatcher) { } b.dispatcher = dispatcher b.ports = make(map[tcpip.NICID]*bridgePort) + b.fdbTable = make(map[BridgeFDBKey]BridgeFDBEntry) } // IsAttached implements stack.LinkEndpoint.IsAttached. @@ -230,3 +282,25 @@ func (b *BridgeEndpoint) Close() {} // SetOnCloseAction implements stack.LinkEndpoint.Close. func (b *BridgeEndpoint) SetOnCloseAction(func()) {} + +// Add a new FDBEntry by learning. The learning happens when a packet +// is received by a bridge port, the bridge will use the port for the future +// deliveries to the NIC device. +// The addr is the key when it looks for the entry. +// +// +checklocks:b.mu +func (b *BridgeEndpoint) addFDBEntryLocked(addr tcpip.LinkAddress, source *bridgePort, flags uint64) bool { + // TODO(b/376924093): limit bridge FDB size. + b.fdbTable[BridgeFDBKey(addr)] = BridgeFDBEntry{ + port: source, + } + return true +} + +// FindFDBEntry find the FDB entry for the given address. If it doesn't exist, +// it will return an empty entry. +func (b *BridgeEndpoint) FindFDBEntry(addr tcpip.LinkAddress) BridgeFDBEntry { + b.mu.RLock() + defer b.mu.RUnlock() + return b.fdbTable[BridgeFDBKey(addr)] +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge_mutex.go index 33d66936..a194f5e8 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bridge_mutex.go @@ -92,5 +92,5 @@ func bridgeinitLockNames() {} func init() { bridgeinitLockNames() - bridgeprefixIndex = locking.NewMutexClass(reflect.TypeOf(bridgeRWMutex{}), bridgelockNames) + bridgeprefixIndex = locking.NewMutexClass(reflect.TypeFor[bridgeRWMutex](), bridgelockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bucket_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bucket_mutex.go index 3cee9c82..d9e3f2ff 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bucket_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/bucket_mutex.go @@ -94,5 +94,5 @@ func bucketinitLockNames() { bucketlockNames = []string{"otherTuple"} } func init() { bucketinitLockNames() - bucketprefixIndex = locking.NewMutexClass(reflect.TypeOf(bucketRWMutex{}), bucketlockNames) + bucketprefixIndex = locking.NewMutexClass(reflect.TypeFor[bucketRWMutex](), bucketlockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/cleanup_endpoints_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/cleanup_endpoints_mutex.go index 0516e7b0..52ef1a6c 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/cleanup_endpoints_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/cleanup_endpoints_mutex.go @@ -60,5 +60,5 @@ func cleanupEndpointsinitLockNames() {} func init() { cleanupEndpointsinitLockNames() - cleanupEndpointsprefixIndex = locking.NewMutexClass(reflect.TypeOf(cleanupEndpointsMutex{}), cleanupEndpointslockNames) + cleanupEndpointsprefixIndex = locking.NewMutexClass(reflect.TypeFor[cleanupEndpointsMutex](), cleanupEndpointslockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_mutex.go index 6a9905ed..125baf7a 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_mutex.go @@ -92,5 +92,5 @@ func conninitLockNames() {} func init() { conninitLockNames() - connprefixIndex = locking.NewMutexClass(reflect.TypeOf(connRWMutex{}), connlockNames) + connprefixIndex = locking.NewMutexClass(reflect.TypeFor[connRWMutex](), connlockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_track_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_track_mutex.go index b416fda7..3ae46027 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_track_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/conn_track_mutex.go @@ -92,5 +92,5 @@ func connTrackinitLockNames() {} func init() { connTrackinitLockNames() - connTrackprefixIndex = locking.NewMutexClass(reflect.TypeOf(connTrackRWMutex{}), connTracklockNames) + connTrackprefixIndex = locking.NewMutexClass(reflect.TypeFor[connTrackRWMutex](), connTracklockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/endpoints_by_nic_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/endpoints_by_nic_mutex.go index 60642030..7c365dfd 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/endpoints_by_nic_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/endpoints_by_nic_mutex.go @@ -92,5 +92,5 @@ func endpointsByNICinitLockNames() {} func init() { endpointsByNICinitLockNames() - endpointsByNICprefixIndex = locking.NewMutexClass(reflect.TypeOf(endpointsByNICRWMutex{}), endpointsByNIClockNames) + endpointsByNICprefixIndex = locking.NewMutexClass(reflect.TypeFor[endpointsByNICRWMutex](), endpointsByNIClockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/headertype_string.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/headertype_string.go index cd80de0c..bc320a41 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/headertype_string.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/headertype_string.go @@ -30,7 +30,7 @@ func _() { const _headerType_name = "virtioNetHeaderlinkHeadernetworkHeadertransportHeadernumHeaderType" -var _headerType_index = [...]uint8{0, 10, 23, 38, 51} +var _headerType_index = [...]uint8{0, 15, 25, 38, 53, 66} func (i headerType) String() string { if i < 0 || i >= headerType(len(_headerType_index)-1) { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables.go index a28ea90c..c801ac22 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables.go @@ -33,6 +33,7 @@ const ( NATID TableID = iota MangleID FilterID + RawID NumTables ) @@ -111,6 +112,27 @@ func DefaultTables(clock tcpip.Clock, rand *rand.Rand) *IPTables { Postrouting: HookUnset, }, }, + RawID: { + Rules: []Rule{ + {Filter: EmptyFilter4(), Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, + {Filter: EmptyFilter4(), Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, + {Filter: EmptyFilter4(), Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, + }, + BuiltinChains: [NumHooks]int{ + Prerouting: 0, + Input: HookUnset, + Forward: HookUnset, + Output: 1, + Postrouting: HookUnset, + }, + Underflows: [NumHooks]int{ + Prerouting: 0, + Input: HookUnset, + Forward: HookUnset, + Output: 1, + Postrouting: HookUnset, + }, + }, }, v6Tables: [NumTables]Table{ NATID: { @@ -176,6 +198,27 @@ func DefaultTables(clock tcpip.Clock, rand *rand.Rand) *IPTables { Postrouting: HookUnset, }, }, + RawID: { + Rules: []Rule{ + {Filter: EmptyFilter6(), Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, + {Filter: EmptyFilter6(), Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, + {Filter: EmptyFilter6(), Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, + }, + BuiltinChains: [NumHooks]int{ + Prerouting: 0, + Input: HookUnset, + Forward: HookUnset, + Output: 1, + Postrouting: HookUnset, + }, + Underflows: [NumHooks]int{ + Prerouting: 0, + Input: HookUnset, + Forward: HookUnset, + Output: 1, + Postrouting: HookUnset, + }, + }, }, connections: ConnTrack{ seed: rand.Uint32(), @@ -215,6 +258,24 @@ func EmptyNATTable() Table { } } +// EmptyRawTable returns a Table with no rules and only the Prerouting and +// Output hooks set, matching the Linux raw table's valid hooks. +func EmptyRawTable() Table { + return Table{ + Rules: []Rule{}, + BuiltinChains: [NumHooks]int{ + Input: HookUnset, + Forward: HookUnset, + Postrouting: HookUnset, + }, + Underflows: [NumHooks]int{ + Input: HookUnset, + Forward: HookUnset, + Postrouting: HookUnset, + }, + } +} + // GetTable returns a table with the given id and IP version. It panics when an // invalid id is provided. func (it *IPTables) GetTable(id TableID, ipv6 bool) Table { @@ -335,9 +396,13 @@ func (it *IPTables) shouldSkipOrPopulateTables(tables []checkTable, pkt *PacketB // This is called in the hot path even when iptables are disabled, so we ensure // that it does not allocate. Note that called functions (e.g. // getConnAndUpdate) can allocate. -// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. +// +checkescape func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndpoint, inNicName string) bool { - tables := [...]checkTable{ + tables := [...]checkTable{ // escapes: on arm this causes an allocation. + { + fn: check, + tableID: RawID, + }, { fn: check, tableID: MangleID, @@ -373,9 +438,9 @@ func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndp // This is called in the hot path even when iptables are disabled, so we ensure // that it does not allocate. Note that called functions (e.g. // getConnAndUpdate) can allocate. -// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. +// +checkescape func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool { - tables := [...]checkTable{ + tables := [...]checkTable{ // escapes: on arm this causes an allocation. { fn: checkNAT, tableID: NATID, @@ -413,9 +478,9 @@ func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool { // This is called in the hot path even when iptables are disabled, so we ensure // that it does not allocate. Note that called functions (e.g. // getConnAndUpdate) can allocate. -// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. +// +checkescape func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string) bool { - tables := [...]checkTable{ + tables := [...]checkTable{ // escapes: on arm this causes an allocation. { fn: check, tableID: FilterID, @@ -445,9 +510,13 @@ func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string // This is called in the hot path even when iptables are disabled, so we ensure // that it does not allocate. Note that called functions (e.g. // getConnAndUpdate) can allocate. -// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. +// +checkescape func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string) bool { - tables := [...]checkTable{ + tables := [...]checkTable{ // escapes: on arm this causes an allocation. + { + fn: check, + tableID: RawID, + }, { fn: check, tableID: MangleID, @@ -489,9 +558,9 @@ func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string) // This is called in the hot path even when iptables are disabled, so we ensure // that it does not allocate. Note that called functions (e.g. // getConnAndUpdate) can allocate. -// TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. +// +checkescape func (it *IPTables) CheckPostrouting(pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, outNicName string) bool { - tables := [...]checkTable{ + tables := [...]checkTable{ // escapes: on arm this causes an allocation. { fn: check, tableID: MangleID, diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_mutex.go index 9a2b97f0..928d0802 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_mutex.go @@ -92,5 +92,5 @@ func ipTablesinitLockNames() {} func init() { ipTablesinitLockNames() - ipTablesprefixIndex = locking.NewMutexClass(reflect.TypeOf(ipTablesRWMutex{}), ipTableslockNames) + ipTablesprefixIndex = locking.NewMutexClass(reflect.TypeFor[ipTablesRWMutex](), ipTableslockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_targets.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_targets.go index 3ddc5d98..39b9e6ca 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_targets.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_targets.go @@ -413,6 +413,26 @@ func (mt *MasqueradeTarget) Action(pkt *PacketBuffer, hook Hook, r *Route, addre return snatAction(pkt, hook, r, 0 /* port */, address, true /* changePort */, true /* changeAddress */) } +// CTTarget is a no-op implementation of the CT (conntrack) target used in the +// raw table. In Linux, CT --zone sets conntrack zones for connection tracking +// isolation. gVisor's conntrack does not support zones, so this target simply +// accepts the packet, allowing iptables-restore to load rulesets that reference +// CT targets (e.g. Istio with DNS capture enabled). +// +// +stateify savable +type CTTarget struct { + // NetworkProtocol is the network protocol the target is used with. + NetworkProtocol tcpip.NetworkProtocolNumber + + // Zone is the conntrack zone ID. Stored but not acted upon. + Zone uint16 +} + +// Action implements Target.Action. It is a no-op that accepts the packet. +func (*CTTarget) Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) { + return RuleAccept, 0 +} + func rewritePacket(n header.Network, t header.Transport, updateSRCFields, fullChecksum, updatePseudoHeader bool, newPortOrIdent uint16, newAddr tcpip.Address) { switch t := t.(type) { case header.ChecksummableTransport: diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_types.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_types.go index 0c7ce686..cfc28785 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_types.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/iptables_types.go @@ -82,7 +82,7 @@ const ( type IPTables struct { connections ConnTrack - reaper tcpip.Timer + reaper tcpip.Timer `state:"nosave"` mu ipTablesRWMutex `state:"nosave"` // v4Tables and v6tables map tableIDs to tables. They hold builtin @@ -305,7 +305,7 @@ func (fl IPHeaderFilter) match(pkt *PacketBuffer, hook Hook, inNicName, outNicNa switch hook { case Prerouting, Input: return matchIfName(inNicName, fl.InputInterface, fl.InputInterfaceInvert) - case Output: + case Postrouting, Output: return matchIfName(outNicName, fl.OutputInterface, fl.OutputInterfaceInvert) case Forward: if !matchIfName(inNicName, fl.InputInterface, fl.InputInterfaceInvert) { @@ -316,8 +316,6 @@ func (fl IPHeaderFilter) match(pkt *PacketBuffer, hook Hook, inNicName, outNicNa return false } - return true - case Postrouting: return true default: panic(fmt.Sprintf("unknown hook: %d", hook)) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/multi_port_endpoint_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/multi_port_endpoint_mutex.go index 1038997b..f5007612 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/multi_port_endpoint_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/multi_port_endpoint_mutex.go @@ -92,5 +92,5 @@ func multiPortEndpointinitLockNames() {} func init() { multiPortEndpointinitLockNames() - multiPortEndpointprefixIndex = locking.NewMutexClass(reflect.TypeOf(multiPortEndpointRWMutex{}), multiPortEndpointlockNames) + multiPortEndpointprefixIndex = locking.NewMutexClass(reflect.TypeFor[multiPortEndpointRWMutex](), multiPortEndpointlockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_cache_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_cache_mutex.go index 0de0fea6..5d3bcf0b 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_cache_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_cache_mutex.go @@ -92,5 +92,5 @@ func neighborCacheinitLockNames() {} func init() { neighborCacheinitLockNames() - neighborCacheprefixIndex = locking.NewMutexClass(reflect.TypeOf(neighborCacheRWMutex{}), neighborCachelockNames) + neighborCacheprefixIndex = locking.NewMutexClass(reflect.TypeFor[neighborCacheRWMutex](), neighborCachelockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry.go index baa62f11..46088b98 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry.go @@ -29,6 +29,8 @@ const ( ) // NeighborEntry describes a neighboring device in the local network. +// +// +stateify savable type NeighborEntry struct { Addr tcpip.Address LinkAddr tcpip.LinkAddress @@ -76,17 +78,38 @@ const ( Unreachable ) +// +stateify savable type timer struct { // done indicates to the timer that the timer was stopped. done *bool - timer tcpip.Timer + timer tcpip.Timer `state:"nosave"` +} + +// +stateify savable +type neighborEntryMu struct { + neighborEntryRWMutex `state:"nosave"` + + neigh NeighborEntry + + // done is closed when address resolution is complete. It is nil iff s is + // incomplete and resolution is not yet in progress. + done chan struct{} `state:"nosave"` + + // onResolve is called with the result of address resolution. + onResolve []func(LinkResolutionResult) `state:"nosave"` + + isRouter bool + + timer timer } // neighborEntry implements a neighbor entry's individual node behavior, as per // RFC 4861 section 7.3.3. Neighbor Unreachability Detection operates in // parallel with the sending of packets to a neighbor, necessitating the // entry's lock to be acquired for all operations. +// +// +stateify savable type neighborEntry struct { neighborEntryEntry @@ -95,22 +118,7 @@ type neighborEntry struct { // nudState points to the Neighbor Unreachability Detection configuration. nudState *NUDState - mu struct { - neighborEntryRWMutex - - neigh NeighborEntry - - // done is closed when address resolution is complete. It is nil iff s is - // incomplete and resolution is not yet in progress. - done chan struct{} - - // onResolve is called with the result of address resolution. - onResolve []func(LinkResolutionResult) - - isRouter bool - - timer timer - } + mu neighborEntryMu } // newNeighborEntry creates a neighbor cache entry starting at the default @@ -571,7 +579,7 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla // here. ep := e.cache.nic.getNetworkEndpoint(header.IPv6ProtocolNumber) if ep == nil { - panic(fmt.Sprintf("have a neighbor entry for an IPv6 router but no IPv6 network endpoint")) + panic("have a neighbor entry for an IPv6 router but no IPv6 network endpoint") } if ndpEP, ok := ep.(NDPEndpoint); ok { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry_mutex.go index c6b08eb8..ff8c31de 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/neighbor_entry_mutex.go @@ -92,5 +92,5 @@ func neighborEntryinitLockNames() {} func init() { neighborEntryinitLockNames() - neighborEntryprefixIndex = locking.NewMutexClass(reflect.TypeOf(neighborEntryRWMutex{}), neighborEntrylockNames) + neighborEntryprefixIndex = locking.NewMutexClass(reflect.TypeFor[neighborEntryRWMutex](), neighborEntrylockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nftables_types.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nftables_types.go new file mode 100644 index 00000000..113d8f13 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nftables_types.go @@ -0,0 +1,170 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "fmt" +) + +// NFTablesInterface is an interface for evaluating chains. +type NFTablesInterface interface { + CheckPrerouting(pkt *PacketBuffer, af AddressFamily) bool + CheckInput(pkt *PacketBuffer, af AddressFamily) bool + CheckForward(pkt *PacketBuffer, af AddressFamily) bool + CheckOutput(pkt *PacketBuffer, af AddressFamily) bool + CheckPostrouting(pkt *PacketBuffer, af AddressFamily) bool + CheckIngress(pkt *PacketBuffer, af AddressFamily) bool + CheckEgress(pkt *PacketBuffer, af AddressFamily) bool +} + +// NFHook describes specific points in the pipeline where chains can be attached. +// Each address family has its own set of hooks (defined in supportedHooks). +// For IPv4/IPv6/Inet and Bridge, there are two possible pipelines: +// 1. Prerouting -> Input -> ~Local Process~ -> Output -> Postrouting +// 2. Prerouting -> Forward -> Postrouting +type NFHook uint16 + +const ( + // NFPrerouting Hook is supported by IPv4/IPv6/Inet, Bridge Families. + // Prerouting is evaluated before a packet is routed to applications or forwarded. + NFPrerouting NFHook = iota + + // NFInput Hook is supported by IPv4/IPv6/Inet, Bridge, ARP Families. + // Input is evaluated before a packet reaches an application. + NFInput + + // NFForward Hook is supported by IPv4/IPv6/Inet, Bridge Families. + // Forward is evaluated once it's decided that a packet should be forwarded to another host. + NFForward + + // NFOutput Hook is supported by IPv4/IPv6/Inet, Bridge, ARP Families. + // Output is evaluated after a packet is written by an application to be sent out. + NFOutput + + // NFPostrouting Hook is supported by IPv4/IPv6/Inet, Bridge Families. + // Postrouting is evaluated just before a packet goes out on the wire. + NFPostrouting + + // NFIngress Hook is supported by IPv4/IPv6/Inet, Bridge, Netdev Families. + // Ingress is the first hook evaluated, even before prerouting. + NFIngress + + // NFEgress Hook is supported by Netdev Family only. + // Egress is the last hook evaluated, after the packet has been processed by the + // application and is being prepared for transmission out of the network interface. + NFEgress + + // NFNumHooks is the number of hooks supported by nftables. + NFNumHooks +) + +// hookStrings maps hooks to their string representation. +var hookStrings = map[NFHook]string{ + NFPrerouting: "Prerouting", + NFInput: "Input", + NFForward: "Forward", + NFOutput: "Output", + NFPostrouting: "Postrouting", + NFIngress: "Ingress", + NFEgress: "Egress", +} + +// String for Hook returns the name of the hook. +func (h NFHook) String() string { + if hook, ok := hookStrings[h]; ok { + return hook + } + panic(fmt.Sprintf("invalid NFHook: %d", int(h))) +} + +// AddressFamily describes the 6 address families supported by nftables. +// The address family determines the type of packets processed, and each family +// contains hooks at specific stages of the packet processing pipeline. +type AddressFamily int + +const ( + // Unspec represents an unspecified address family. + Unspec AddressFamily = iota + + // IP represents IPv4 Family. + IP + + // IP6 represents IPv6 Family. + IP6 + + // Inet represents Internet Family for hybrid IPv4/IPv6 rules. + Inet + + // Arp represents ARP Family for IPv4 ARP packets. + Arp + + // Bridge represents Bridge Family for Ethernet packets across bridge devices. + Bridge + + // Netdev represents Netdev Family for packets on ingress and egress. + Netdev + + // NumAFs is the number of address families supported by nftables. + NumAFs +) + +// AddressFamilyStrings maps address families to their string representation. +var AddressFamilyStrings = map[AddressFamily]string{ + Unspec: "UNSPEC", + IP: "IPv4", + IP6: "IPv6", + Inet: "Internet (Both IPv4/IPv6)", + Arp: "ARP", + Bridge: "Bridge", + Netdev: "Netdev", +} + +// ValidateAddressFamily ensures the family address is valid (within bounds). +// Unspecified address family is not valid. It is only used to reference all address families. +func ValidateAddressFamily(family AddressFamily) error { + if family < 1 || family >= NumAFs { + return fmt.Errorf("invalid address family: %d", int(family)) + } + return nil +} + +// String for AddressFamily returns the name of the address family. +func (f AddressFamily) String() string { + if af, ok := AddressFamilyStrings[f]; ok { + return af + } + panic(fmt.Sprintf("invalid address family: %d", int(f))) +} + +// +// Verdict Implementation. +// There are two types of verdicts: +// 1. Netfilter (External) Verdicts: Drop, Accept, Stolen, Queue, Repeat, Stop +// These are terminal verdicts that are returned to the kernel. +// 2. Nftable (Internal) Verdicts:, Continue, Break, Jump, Goto, Return +// These are internal verdicts that only exist within the nftables library. +// Both share the same numeric space (uint32 Verdict Code). +// + +// NFVerdict represents the result of evaluating a packet against a rule or chain. +type NFVerdict struct { + // Code is the numeric code that represents the verdict issued. + Code uint32 + + // ChainName is the name of the chain to continue evaluation if the verdict is + // Jump or Goto. + // Note: the chain must be in the same table as the current chain. + ChainName string +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic.go index 9625f6bb..c4275623 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic.go @@ -17,6 +17,7 @@ package stack import ( "fmt" "reflect" + "sort" "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/tcpip" @@ -71,7 +72,7 @@ type nic struct { // complete. linkResQueue packetsPendingLinkResolution - // packetEPsMu protects annotated fields below. + // packetEPsMu protects packetEPs. packetEPsMu packetEPsRWMutex `state:"nosave"` // eps is protected by the mutex, but the values contained in it are not. @@ -90,6 +91,10 @@ type nic struct { // Primary is the main controlling interface in a bonded setup. Primary *nic + + // experimentIPOptionEnabled indicates whether the NIC supports the + // experiment IP option. + experimentIPOptionEnabled bool } // makeNICStats initializes the NIC statistics and associates them to the global @@ -186,16 +191,13 @@ func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint, opts NICOptions) *nic networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint), linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]*linkResolver), duplicateAddressDetectors: make(map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector), + packetEPs: make(map[tcpip.NetworkProtocolNumber]*packetEndpointList), qDisc: qDisc, deliverLinkPackets: opts.DeliverLinkPackets, + experimentIPOptionEnabled: opts.EnableExperimentIPOption, } nic.linkResQueue.init(nic) - nic.packetEPsMu.Lock() - defer nic.packetEPsMu.Unlock() - - nic.packetEPs = make(map[tcpip.NetworkProtocolNumber]*packetEndpointList) - resolutionRequired := ep.Capabilities()&CapabilityResolutionRequired != 0 for _, netProto := range stack.networkProtocols { @@ -312,7 +314,7 @@ func (n *nic) enable() tcpip.Error { // resources. This guarantees no packets between this NIC and the network // stack. // -// It returns an action that has to be excuted after releasing the Stack lock +// It returns an action that has to be executed after releasing the Stack lock // and any error encountered. func (n *nic) remove(closeLinkEndpoint bool) (func(), tcpip.Error) { n.enableDisableMu.Lock() @@ -569,12 +571,22 @@ func (n *nic) allPermanentAddresses() []tcpip.ProtocolAddress { // primaryAddresses returns the primary addresses associated with this NIC. func (n *nic) primaryAddresses() []tcpip.ProtocolAddress { var addrs []tcpip.ProtocolAddress - for p, ep := range n.networkEndpoints { - addressableEndpoint, ok := ep.(AddressableEndpoint) + + protocolNumbers := make([]tcpip.NetworkProtocolNumber, 0, len(n.networkEndpoints)) + for p := range n.networkEndpoints { + protocolNumbers = append(protocolNumbers, p) + } + // Sort the network protocol numbers so that IPv4 address is always + // added to the list before IPv6 address. + sort.Slice(protocolNumbers, func(i, j int) bool { + return protocolNumbers[i] < protocolNumbers[j] + }) + + for _, p := range protocolNumbers { + addressableEndpoint, ok := n.networkEndpoints[p].(AddressableEndpoint) if !ok { continue } - for _, a := range addressableEndpoint.PrimaryAddresses() { addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a}) } @@ -1095,6 +1107,12 @@ func (n *nic) multicastForwarding(protocol tcpip.NetworkProtocolNumber) (bool, t return ep.MulticastForwarding(), nil } +// GetExperimentIPOptionEnabled returns whether the NIC is responsible for +// passing the experiment IP option. +func (n *nic) GetExperimentIPOptionEnabled() bool { + return n.experimentIPOptionEnabled +} + // CoordinatorNIC represents NetworkLinkEndpoint that can join multiple network devices. type CoordinatorNIC interface { // AddNIC adds the specified NIC device. diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic_mutex.go index e3b2332a..433f1751 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/nic_mutex.go @@ -92,5 +92,5 @@ func nicinitLockNames() {} func init() { nicinitLockNames() - nicprefixIndex = locking.NewMutexClass(reflect.TypeOf(nicRWMutex{}), niclockNames) + nicprefixIndex = locking.NewMutexClass(reflect.TypeFor[nicRWMutex](), niclockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_buffer.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_buffer.go index 24956e71..1804e53f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_buffer.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_buffer.go @@ -381,6 +381,7 @@ func (pk *PacketBuffer) Clone() *PacketBuffer { newPk.Hash = pk.Hash newPk.Owner = pk.Owner newPk.GSOOptions = pk.GSOOptions + newPk.EgressRoute = pk.EgressRoute newPk.NetworkProtocolNumber = pk.NetworkProtocolNumber newPk.dnatDone = pk.dnatDone newPk.snatDone = pk.snatDone @@ -767,3 +768,52 @@ func BufferSince(h PacketHeader) buffer.Buffer { clone.TrimFront(int64(offset)) return clone } + +// ExperimentOptionValue returns the experiment option value from the packet +// and a bool indicating whether an experiment option value was found. +func (pk *PacketBuffer) ExperimentOptionValue() (uint16, bool) { + switch pk.NetworkProtocolNumber { + case header.IPv4ProtocolNumber: + h := header.IPv4(pk.NetworkHeader().Slice()) + opts := h.Options() + iter := opts.MakeIterator() + for { + opt, done, err := iter.Next() + if err != nil { + return 0, false + } + if done { + return 0, false + } + if opt.Type() == header.IPv4OptionExperimentType { + return opt.(*header.IPv4OptionExperiment).Value(), true + } + } + case header.IPv6ProtocolNumber: + h := header.IPv6(pk.NetworkHeader().Slice()) + v := pk.NetworkHeader().View() + if v != nil { + v.TrimFront(header.IPv6MinimumSize) + } + buf := buffer.MakeWithView(v) + buf.Append(pk.TransportHeader().View()) + dataBuf := pk.Data().ToBuffer() + buf.Merge(&dataBuf) + it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), buf) + + for { + hdr, done, err := it.Next() + if done || err != nil { + break + } + if h, ok := hdr.(header.IPv6ExperimentExtHdr); ok { + hdr.Release() + return h.Value, true + } + hdr.Release() + } + default: + panic(fmt.Sprintf("Unexpected network protocol number %d", pk.NetworkProtocolNumber)) + } + return 0, false +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_endpoint_list_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_endpoint_list_mutex.go index ad3e0b28..f606cf08 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_endpoint_list_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_endpoint_list_mutex.go @@ -92,5 +92,5 @@ func packetEndpointListinitLockNames() {} func init() { packetEndpointListinitLockNames() - packetEndpointListprefixIndex = locking.NewMutexClass(reflect.TypeOf(packetEndpointListRWMutex{}), packetEndpointListlockNames) + packetEndpointListprefixIndex = locking.NewMutexClass(reflect.TypeFor[packetEndpointListRWMutex](), packetEndpointListlockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_eps_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_eps_mutex.go index 4e9dda8b..6f62af95 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_eps_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packet_eps_mutex.go @@ -92,5 +92,5 @@ func packetEPsinitLockNames() {} func init() { packetEPsinitLockNames() - packetEPsprefixIndex = locking.NewMutexClass(reflect.TypeOf(packetEPsRWMutex{}), packetEPslockNames) + packetEPsprefixIndex = locking.NewMutexClass(reflect.TypeFor[packetEPsRWMutex](), packetEPslockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packets_pending_link_resolution_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packets_pending_link_resolution_mutex.go index ac47a79e..e2ad0048 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packets_pending_link_resolution_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/packets_pending_link_resolution_mutex.go @@ -60,5 +60,5 @@ func packetsPendingLinkResolutioninitLockNames() {} func init() { packetsPendingLinkResolutioninitLockNames() - packetsPendingLinkResolutionprefixIndex = locking.NewMutexClass(reflect.TypeOf(packetsPendingLinkResolutionMutex{}), packetsPendingLinkResolutionlockNames) + packetsPendingLinkResolutionprefixIndex = locking.NewMutexClass(reflect.TypeFor[packetsPendingLinkResolutionMutex](), packetsPendingLinkResolutionlockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/pending_packets.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/pending_packets.go index b95c3cf0..03c81d05 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/pending_packets.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/pending_packets.go @@ -33,9 +33,8 @@ type pendingPacket struct { pkt *PacketBuffer } -// +stateify savable type packetsPendingLinkResolutionMu struct { - packetsPendingLinkResolutionMutex `state:"nosave"` + packetsPendingLinkResolutionMutex // The packets to send once the resolver completes. // @@ -56,7 +55,7 @@ type packetsPendingLinkResolutionMu struct { // +stateify savable type packetsPendingLinkResolution struct { nic *nic - mu packetsPendingLinkResolutionMu + mu packetsPendingLinkResolutionMu `state:"nosave"` } func (f *packetsPendingLinkResolution) incrementOutgoingPacketErrors(pkt *PacketBuffer) { @@ -150,7 +149,7 @@ func (f *packetsPendingLinkResolution) enqueue(r *Route, pkt *PacketBuffer) tcpi packets, ok := f.mu.packets[ch] packets = append(packets, pendingPacket{ routeInfo: routeInfo, - pkt: pkt.IncRef(), + pkt: pkt.Clone(), }) if len(packets) > maxPendingPacketsPerResolution { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/registration.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/registration.go index 24f0391b..2c23eee2 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/registration.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/registration.go @@ -58,6 +58,10 @@ type NetworkPacketInfo struct { // address. LocalAddressBroadcast bool + // LocalAddressTemporary is true if the packet's local address is a temporary + // address. + LocalAddressTemporary bool + // IsForwardedPacket is true if the packet is being forwarded. IsForwardedPacket bool } @@ -162,7 +166,7 @@ type PacketEndpoint interface { // match the endpoint. // // Implementers should treat packet as immutable and should copy it - // before before modification. + // before modification. // // linkHeader may have a length of 0, in which case the PacketEndpoint // should construct its own ethernet header for applications. @@ -171,6 +175,71 @@ type PacketEndpoint interface { HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer) } +// MappablePacketEndpoint is a packet endpoint that supports forwarding its +// packets to a PacketMMapEndpoint. +type MappablePacketEndpoint interface { + PacketEndpoint + + // GetPacketMMapOpts returns the options for initializing a PacketMMapEndpoint + // for this endpoint. + GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) PacketMMapOpts + + // SetPacketMMapEndpoint sets the PacketMMapEndpoint for this endpoint. All + // packets received by this endpoint will be forwarded to the provided + // PacketMMapEndpoint. + SetPacketMMapEndpoint(ep PacketMMapEndpoint) + + // GetPacketMMapEndpoint returns the PacketMMapEndpoint for this endpoint or + // nil if there is none. + GetPacketMMapEndpoint() PacketMMapEndpoint + + // HandlePacketMMapCopy is a function that is called when a packet received is + // too large for the buffer size specified for the memory mapped endpoint. In + // this case, the packet is copied and passed to the original packet endpoint. + HandlePacketMMapCopy(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer) +} + +// PacketMMapOpts are the options for initializing a PacketMMapEndpoint. +// +// +stateify savable +type PacketMMapOpts struct { + Req *tcpip.TpacketReq + IsRx bool + Cooked bool + Stack *Stack + Wq *waiter.Queue + PacketEndpoint MappablePacketEndpoint + Version int + Reserve uint32 +} + +// PacketMMapEndpoint is the interface implemented by endpoints to handle memory +// mapped packets over the packet transport protocol (PACKET_MMAP). +type PacketMMapEndpoint interface { + // HandlePacket is called by the stack when new packets arrive that + // match the endpoint. It returns true if the packet was handled by the + // endpoint and false otherwise. + // + // Implementers should treat packet as immutable and should copy it + // before modification. + // + // linkHeader may have a length of 0, in which case the PacketEndpoint + // should construct its own ethernet header for applications. + // + // HandlePacket may modify pkt. + HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer) bool + + // Close releases any resources associated with the endpoint. + Close() + + // Readiness returns the events that the endpoint is ready for. + Readiness(mask waiter.EventMask) waiter.EventMask + + // Stats returns the statistics for the endpoint that can be used for + // getsockopt(PACKET_STATISTICS). + Stats() tcpip.TpacketStats +} + // UnknownDestinationPacketDisposition enumerates the possible return values from // HandleUnknownDestinationPacket(). type UnknownDestinationPacketDisposition int @@ -244,6 +313,9 @@ type TransportProtocol interface { // previously paused by Pause. Resume() + // Restore starts any protocol level background workers during restore. + Restore() + // Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does // neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() < // MinimumPacketSize() @@ -319,6 +391,10 @@ type NetworkHeaderParams struct { // DF indicates whether the DF bit should be set. DF bool + + // ExperimentOptionValue is a 16 bit value that is set for the IP experiment + // option headers if it is not zero. + ExperimentOptionValue uint16 } // GroupAddressableEndpoint is an endpoint that supports group addressing. @@ -796,6 +872,9 @@ type NetworkEndpoint interface { // minus the network endpoint max header length. MTU() uint32 + // EndpointHeaderSize returns the size of this endpoint header. + EndpointHeaderSize() uint32 + // MaxHeaderLength returns the maximum size the network (and lower // level layers combined) headers can have. Higher levels use this // information to reserve space in the front of the packets they're @@ -1059,7 +1138,6 @@ const ( CapabilityRXChecksumOffload CapabilityResolutionRequired CapabilitySaveRestore - CapabilityDisconnectOk CapabilityLoopback ) @@ -1142,7 +1220,7 @@ type NetworkLinkEndpoint interface { // Close is called when the endpoint is removed from a stack. Close() - // SetOnCloseAction sets the action that will be exected before closing the + // SetOnCloseAction sets the action that will be executed before closing the // endpoint. It is used to destroy a network device when its endpoint // is closed. Endpoints that are closed only after destroying their // network devices can implement this method as no-op. diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route.go index e571e8a1..1f9bff3d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route.go @@ -180,7 +180,7 @@ func constructAndValidateRoute(netProto tcpip.NetworkProtocolNumber, addressEndp // AssignableAddressEndpoint. func makeRoute(netProto tcpip.NetworkProtocolNumber, gateway, localAddr, remoteAddr tcpip.Address, outgoingNIC, localAddressNIC *nic, localAddressEndpoint AssignableAddressEndpoint, handleLocal, multicastLoop bool, mtu uint32) *Route { if localAddressNIC.stack != outgoingNIC.stack { - panic(fmt.Sprintf("cannot create a route with NICs from different stacks")) + panic("cannot create a route with NICs from different stacks") } if localAddr.BitLen() == 0 { @@ -245,6 +245,14 @@ func makeRoute(netProto tcpip.NetworkProtocolNumber, gateway, localAddr, remoteA } func makeRouteInner(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, outgoingNIC, localAddressNIC *nic, localAddressEndpoint AssignableAddressEndpoint, loop PacketLooping, mtu uint32) *Route { + if mtu != 0 { + adjusted := mtu - outgoingNIC.getNetworkEndpoint(netProto).EndpointHeaderSize() + if adjusted > mtu { + mtu = 0 + } else { + mtu = adjusted + } + } r := &Route{ routeInfo: routeInfo{ NetProto: netProto, @@ -339,11 +347,6 @@ func (r *Route) HasSaveRestoreCapability() bool { return r.outgoingNIC.NetworkLinkEndpoint.Capabilities()&CapabilitySaveRestore != 0 } -// HasDisconnectOkCapability returns true if the route supports disconnecting. -func (r *Route) HasDisconnectOkCapability() bool { - return r.outgoingNIC.NetworkLinkEndpoint.Capabilities()&CapabilityDisconnectOk != 0 -} - // GSOMaxSize returns the maximum GSO packet size. func (r *Route) GSOMaxSize() uint32 { if gso, ok := r.outgoingNIC.NetworkLinkEndpoint.(GSOEndpoint); ok { @@ -525,6 +528,7 @@ func (r *Route) DefaultTTL() uint8 { // MTU returns the MTU of the route if present, otherwise the MTU of the underlying network endpoint. func (r *Route) MTU() uint32 { if r.mtu > 0 { + // r.mtu is already adjusted to account for IP headers. See makeRouteInner. return r.mtu } return r.outgoingNIC.getNetworkEndpoint(r.NetProto()).MTU() diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_mutex.go index 28a5e869..0f95350d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_mutex.go @@ -92,5 +92,5 @@ func routeinitLockNames() {} func init() { routeinitLockNames() - routeprefixIndex = locking.NewMutexClass(reflect.TypeOf(routeRWMutex{}), routelockNames) + routeprefixIndex = locking.NewMutexClass(reflect.TypeFor[routeRWMutex](), routelockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_stack_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_stack_mutex.go index ec3796c3..0a969f75 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_stack_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/route_stack_mutex.go @@ -92,5 +92,5 @@ func routeStackinitLockNames() {} func init() { routeStackinitLockNames() - routeStackprefixIndex = locking.NewMutexClass(reflect.TypeOf(routeStackRWMutex{}), routeStacklockNames) + routeStackprefixIndex = locking.NewMutexClass(reflect.TypeFor[routeStackRWMutex](), routeStacklockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/save_restore.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/save_restore.go new file mode 100644 index 00000000..48ed8c78 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/save_restore.go @@ -0,0 +1,54 @@ +// Copyright 2024 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "context" + "math/rand" + "time" + + cryptorand "gvisor.dev/gvisor/pkg/rand" +) + +// beforeSave is invoked by stateify. +func (s *Stack) beforeSave() { + // removeConf will be set only in case of save/restore. + s.mu.Lock() + if !s.removeConf { + s.mu.Unlock() + return + } + + // Remove all the NICs and routes from the stack as they will be + // created again during restore based on the new network config. + deferActs := make([]func(), 0) + for id := range s.nics { + act, _ := s.removeNICLocked(id, true /* closeLinkEndpoint */) + if act != nil { + deferActs = append(deferActs, act) + } + } + s.mu.Unlock() + + for _, act := range deferActs { + act() + } +} + +// afterLoad is invoked by stateify. +func (s *Stack) afterLoad(context.Context) { + s.insecureRNG = rand.New(rand.NewSource(time.Now().UnixNano())) + s.secureRNG = cryptorand.RNGFrom(cryptorand.Reader) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack.go index 7dc7cd35..6f6dbcc8 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack.go @@ -20,11 +20,11 @@ package stack import ( + "context" "encoding/binary" "fmt" "io" "math/rand" - "sync/atomic" "time" "golang.org/x/time/rate" @@ -90,16 +90,18 @@ type Stack struct { // routeTable is a list of routes sorted by prefix length, longest (most specific) first. // +checklocks:routeMu - routeTable tcpip.RouteList + routeTable tcpip.RouteList `state:"nosave"` mu stackRWMutex `state:"nosave"` // +checklocks:mu - nics map[tcpip.NICID]*nic + nics map[tcpip.NICID]*nic `state:"nosave"` + // +checklocks:mu + loopbackNIC *nic `state:"nosave"` // +checklocks:mu defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{} // nicIDGen is used to generate NIC IDs. - nicIDGen atomicbitops.Int32 + nicIDGen atomicbitops.Int32 `state:"nosave"` // cleanupEndpointsMu protects cleanupEndpoints. cleanupEndpointsMu cleanupEndpointsMutex `state:"nosave"` @@ -108,11 +110,6 @@ type Stack struct { *ports.PortManager - // If not nil, then any new endpoints will have this probe function - // invoked everytime they receive a TCP segment. - // TODO(b/341946753): Restore them when netstack is savable. - tcpProbeFunc atomic.Value `state:"nosave"` // TCPProbeFunc - // clock is used to generate user-visible times. clock tcpip.Clock @@ -123,6 +120,13 @@ type Stack struct { // TODO(gvisor.dev/issue/4595): S/R this field. tables *IPTables `state:"nosave"` + // nftables is the nftables interface for packet filtering and manipulation rules. + nftables NFTablesInterface `state:"nosave"` + + // nftablesConfigured indicates whether NFTables is configured with at + // least one rule on a chain at a network hook. + nftablesConfigured atomicbitops.Bool + // restoredEndpoints is a list of endpoints that need to be restored if the // stack is being restored. restoredEndpoints []RestoredEndpoint @@ -150,11 +154,9 @@ type Stack struct { // randomGenerator is an injectable pseudo random generator that can be // used when a random number is required. It must not be used in // security-sensitive contexts. - // TODO(b/341946753): Restore them when netstack is savable. insecureRNG *rand.Rand `state:"nosave"` // secureRNG is a cryptographically secure random number generator. - // TODO(b/341946753): Restore them when netstack is savable. secureRNG cryptorand.RNG `state:"nosave"` // sendBufferSize holds the min/default/max send buffer sizes for @@ -180,6 +182,18 @@ type Stack struct { // tsOffsetSecret is the secret key for generating timestamp offsets // initialized at stack startup. tsOffsetSecret uint32 + + // saveRestoreEnabled indicates whether the stack is saved and restored. + saveRestoreEnabled bool + + // removeConf indicates whether to remove NICs and routes and terminate + // active connections before saving. This flag will be set to true only + // when resume is false. + removeConf bool `state:"nosave"` + + // externalNetworkingDisabled indicates whether external networking is + // disabled. This means all non-loopback NICs are disabled. + externalNetworkingDisabled bool } // NetworkProtocolFactory instantiates a network protocol. @@ -242,6 +256,9 @@ type Options struct { // all traffic. IPTables *IPTables + // NFTables is the nftables interface for packet filtering and manipulation rules. + NFTables NFTablesInterface + // DefaultIPTables is an optional iptables rules constructor that is called // if IPTables is nil. If both fields are nil, iptables will allow all // traffic. @@ -394,6 +411,7 @@ func New(opts Options) *Stack { stats: opts.Stats.FillIn(), handleLocal: opts.HandleLocal, tables: opts.IPTables, + nftables: opts.NFTables, icmpRateLimiter: NewICMPRateLimiter(clock), seed: secureRNG.Uint32(), nudConfigs: opts.NUDConfigs, @@ -779,26 +797,30 @@ func (s *Stack) addRouteLocked(route *tcpip.Route) { s.routeTable.PushBack(route) } -// RemoveRoutes removes matching routes from the route table. -func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { +// RemoveRoutes removes matching routes from the route table, it +// returns the number of routes that are removed. +func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) int { s.routeMu.Lock() defer s.routeMu.Unlock() - s.removeRoutesLocked(match) + return s.removeRoutesLocked(match) } // +checklocks:s.routeMu -func (s *Stack) removeRoutesLocked(match func(tcpip.Route) bool) { +func (s *Stack) removeRoutesLocked(match func(tcpip.Route) bool) int { + count := 0 for route := s.routeTable.Front(); route != nil; { next := route.Next() if match(*route) { s.routeTable.Remove(route) + count++ } route = next } + return count } -// ReplaceRoute replaces the route in the routing table which matchse +// ReplaceRoute replaces the route in the routing table which matches // the lookup key for the routing table. If there is no match, the given // route will still be added to the routing table. // The lookup key consists of destination, ToS, scope and output interface. @@ -878,12 +900,16 @@ type NICOptions struct { // DeliverLinkPackets specifies whether the NIC is responsible for // delivering raw packets to packet sockets. DeliverLinkPackets bool + + // EnableExperimentIPOption specifies whether the NIC is responsible for + // passing the experiment IP option. + EnableExperimentIPOption bool } // GetNICByID return a network device associated with the specified ID. func (s *Stack) GetNICByID(id tcpip.NICID) (*nic, tcpip.Error) { - s.mu.Lock() - defer s.mu.Unlock() + s.mu.RLock() + defer s.mu.RUnlock() n, ok := s.nics[id] if !ok { @@ -925,6 +951,9 @@ func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOp } } s.nics[id] = n + if n.IsLoopback() { + s.loopbackNIC = n + } ep.SetOnCloseAction(func() { s.RemoveNIC(id) }) @@ -1001,7 +1030,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { // RemoveNIC removes NIC and all related routes from the network stack. func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { s.mu.Lock() - deferAct, err := s.removeNICLocked(id) + deferAct, err := s.removeNICLocked(id, true /* closeLinkEndpoint */) s.mu.Unlock() if deferAct != nil { deferAct() @@ -1012,7 +1041,7 @@ func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { // removeNICLocked removes NIC and all related routes from the network stack. // // +checklocks:s.mu -func (s *Stack) removeNICLocked(id tcpip.NICID) (func(), tcpip.Error) { +func (s *Stack) removeNICLocked(id tcpip.NICID, closeLinkEndpoint bool) (func(), tcpip.Error) { nic, ok := s.nics[id] if !ok { return nil, &tcpip.ErrUnknownNICID{} @@ -1037,7 +1066,22 @@ func (s *Stack) removeNICLocked(id tcpip.NICID) (func(), tcpip.Error) { } s.routeMu.Unlock() - return nic.remove(true /* closeLinkEndpoint */) + if s.loopbackNIC == nic { + s.loopbackNIC = nil + } + return nic.remove(closeLinkEndpoint) +} + +// GetNICCoordinatorID returns the ID of the coordinator device of a NIC. +func (s *Stack) GetNICCoordinatorID(id tcpip.NICID) (tcpip.NICID, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + if nic, ok := s.nics[id]; ok { + if nic.Primary != nil { + return nic.Primary.id, true + } + } + return 0, false } // SetNICCoordinator sets a coordinator device. @@ -1049,7 +1093,10 @@ func (s *Stack) SetNICCoordinator(id tcpip.NICID, mid tcpip.NICID) tcpip.Error { if !ok { return &tcpip.ErrUnknownNICID{} } - + // Setting a coordinator for a coordinator NIC is not allowed. + if _, ok := nic.NetworkLinkEndpoint.(CoordinatorNIC); ok { + return &tcpip.ErrNoSuchFile{} + } m, ok := s.nics[mid] if !ok { return &tcpip.ErrUnknownNICID{} @@ -1137,6 +1184,9 @@ type NICInfo struct { // MulticastForwarding holds the forwarding status for each network endpoint // that supports multicast forwarding. MulticastForwarding map[tcpip.NetworkProtocolNumber]bool + + // Primary is the index of the main controlling interface in a bonded setup. + Primary tcpip.NICID } // HasNIC returns true if the NICID is defined in the stack. @@ -1147,65 +1197,87 @@ func (s *Stack) HasNIC(id tcpip.NICID) bool { return ok } +type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) + +func forwardingValue(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { + switch forwarding, err := forwardingFn(proto); err.(type) { + case nil: + return forwarding, true + case *tcpip.ErrUnknownProtocol: + panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) + case *tcpip.ErrNotSupported: + // Not all network protocols support forwarding. + default: + panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) + } + return false, false +} + +// precondition: s.mu is held. +func (s *Stack) nicInfo(nic *nic, id tcpip.NICID) *NICInfo { + flags := NICStateFlags{ + Up: true, // Netstack interfaces are always up. + Running: nic.Enabled(), + Promiscuous: nic.Promiscuous(), + Loopback: nic.IsLoopback(), + } + + netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) + for proto, netEP := range nic.networkEndpoints { + netStats[proto] = netEP.Stats() + } + + info := NICInfo{ + Name: nic.name, + LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), + ProtocolAddresses: nic.primaryAddresses(), + Flags: flags, + MTU: nic.NetworkLinkEndpoint.MTU(), + Stats: nic.stats.local, + NetworkStats: netStats, + Context: nic.context, + ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), + Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), + MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), + } + + for proto := range s.networkProtocols { + if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { + info.Forwarding[proto] = forwarding + } + + if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { + info.MulticastForwarding[proto] = multicastForwarding + } + } + + if nic.Primary != nil { + info.Primary = nic.Primary.id + } + + return &info +} + +// SingleNICInfo returns the NICInfo for the given NICID. +func (s *Stack) SingleNICInfo(id tcpip.NICID) (*NICInfo, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + if nic, ok := s.nics[id]; !ok { + return nil, false + } else { + return s.nicInfo(nic, id), true + } +} + // NICInfo returns a map of NICIDs to their associated information. func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { s.mu.RLock() defer s.mu.RUnlock() - type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) - forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { - switch forwarding, err := forwardingFn(proto); err.(type) { - case nil: - return forwarding, true - case *tcpip.ErrUnknownProtocol: - panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) - case *tcpip.ErrNotSupported: - // Not all network protocols support forwarding. - default: - panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) - } - return false, false - } - nics := make(map[tcpip.NICID]NICInfo) for id, nic := range s.nics { - flags := NICStateFlags{ - Up: true, // Netstack interfaces are always up. - Running: nic.Enabled(), - Promiscuous: nic.Promiscuous(), - Loopback: nic.IsLoopback(), - } - - netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) - for proto, netEP := range nic.networkEndpoints { - netStats[proto] = netEP.Stats() - } - - info := NICInfo{ - Name: nic.name, - LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), - ProtocolAddresses: nic.primaryAddresses(), - Flags: flags, - MTU: nic.NetworkLinkEndpoint.MTU(), - Stats: nic.stats.local, - NetworkStats: netStats, - Context: nic.context, - ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), - Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), - MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), - } - - for proto := range s.networkProtocols { - if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { - info.Forwarding[proto] = forwarding - } - - if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { - info.MulticastForwarding[proto] = multicastForwarding - } - } - - nics[id] = info + nics[id] = *s.nicInfo(nic, id) } return nics } @@ -1371,6 +1443,29 @@ func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, re return r } +func (s *Stack) loopbackLocalRoute(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { + localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, true /* createTemp */, NeverPrimaryEndpoint) + if localAddressEndpoint == nil { + return nil + } + + r := makeLocalRoute( + netProto, + localAddr, + remoteAddr, + localAddressNIC, + localAddressNIC, + localAddressEndpoint, + ) + + if r.IsOutboundBroadcast() { + r.Release() + return nil + } + + return r +} + // findLocalRouteRLocked returns a local route. // // A local route is a route to some remote address which the stack owns. That @@ -1383,6 +1478,22 @@ func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, } if localAddressNICID == 0 { + if s.loopbackNIC != nil { + // Send all packets directed to local ip addresses through the loopback device. + for _, nic := range s.nics { + if !nic.hasAddress(netProto, remoteAddr) { + continue + } + if isSubnetBroadcastOnNIC(nic, netProto, remoteAddr) { + break + } + if r := s.loopbackLocalRoute(s.loopbackNIC, localAddr, remoteAddr, netProto); r != nil { + return r + } + break + } + } + for _, localAddressNIC := range s.nics { if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil { return r @@ -1584,8 +1695,7 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n } } - // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. - return nil, &tcpip.ErrHostUnreachable{} + return nil, &tcpip.ErrNetworkUnreachable{} } if id == 0 { @@ -1598,13 +1708,11 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n } if needRoute { - // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. - return nil, &tcpip.ErrHostUnreachable{} + return nil, &tcpip.ErrNetworkUnreachable{} } if header.IsV6LoopbackAddress(remoteAddr) { return nil, &tcpip.ErrBadLocalAddress{} } - // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. return nil, &tcpip.ErrNetworkUnreachable{} } @@ -1933,7 +2041,7 @@ func (s *Stack) Wait() { for id, n := range s.nics { // Remove NIC to ensure that qDisc goroutines are correctly // terminated on stack teardown. - act, _ := s.removeNICLocked(id) + act, _ := s.removeNICLocked(id, true /* closeLinkEndpoint */) n.NetworkLinkEndpoint.Wait() if act != nil { deferActs = append(deferActs, act) @@ -1959,6 +2067,47 @@ func (s *Stack) Pause() { } } +func (s *Stack) getNICs() map[tcpip.NICID]*nic { + s.mu.RLock() + defer s.mu.RUnlock() + + nics := s.nics + return nics +} + +// ReplaceConfig replaces config in the loaded stack. +func (s *Stack) ReplaceConfig(st *Stack) { + if st == nil { + panic("stack.Stack cannot be nil when netstack s/r is enabled") + } + + // Update route table. + s.SetRouteTable(st.GetRouteTable()) + + nics := st.getNICs() + + s.mu.Lock() + defer s.mu.Unlock() + + // Update iptables and nftables. + s.tables = st.IPTables() + s.nftables = st.NFTables() + + // Update NICs. + s.nics = make(map[tcpip.NICID]*nic) + s.loopbackNIC = nil + for id, nic := range nics { + nic.stack = s + s.nics[id] = nic + if nic.IsLoopback() { + s.loopbackNIC = nic + } else if s.externalNetworkingDisabled { + nic.disable() + } + _ = s.NextNICID() + } +} + // Restore restarts the stack after a restore. This must be called after the // entire system has been restored. func (s *Stack) Restore() { @@ -1967,13 +2116,23 @@ func (s *Stack) Restore() { s.mu.Lock() eps := s.restoredEndpoints s.restoredEndpoints = nil + saveRestoreEnabled := s.saveRestoreEnabled s.mu.Unlock() for _, e := range eps { e.Restore(s) } + + // Make sure all the endpoints are loaded correctly before resuming the + // protocol level background workers. + tcpip.AsyncLoading.Wait() + // Now resume any protocol level background workers. for _, p := range s.transportProtocols { - p.proto.Resume() + if saveRestoreEnabled { + p.proto.Restore() + } else { + p.proto.Resume() + } } } @@ -2102,41 +2261,6 @@ func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) Tra return nil } -// AddTCPProbe installs a probe function that will be invoked on every segment -// received by a given TCP endpoint. The probe function is passed a copy of the -// TCP endpoint state before and after processing of the segment. -// -// NOTE: TCPProbe is added only to endpoints created after this call. Endpoints -// created prior to this call will not call the probe function. -// -// Further, installing two different probes back to back can result in some -// endpoints calling the first one and some the second one. There is no -// guarantee provided on which probe will be invoked. Ideally this should only -// be called once per stack. -func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { - s.tcpProbeFunc.Store(probe) -} - -// GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil -// otherwise. -func (s *Stack) GetTCPProbe() TCPProbeFunc { - p := s.tcpProbeFunc.Load() - if p == nil { - return nil - } - return p.(TCPProbeFunc) -} - -// RemoveTCPProbe removes an installed TCP probe. -// -// NOTE: This only ensures that endpoints created after this call do not -// have a probe attached. Endpoints already created will continue to invoke -// TCP probe. -func (s *Stack) RemoveTCPProbe() { - // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics. - s.tcpProbeFunc.Store(TCPProbeFunc(nil)) -} - // JoinGroup joins the given multicast group on the given NIC. func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { s.mu.RLock() @@ -2176,6 +2300,26 @@ func (s *Stack) IPTables() *IPTables { return s.tables } +// NFTables returns the stack's nftables. +func (s *Stack) NFTables() NFTablesInterface { + return s.nftables +} + +// SetNFTables sets the stack's nftables. +func (s *Stack) SetNFTables(nft NFTablesInterface) { + s.nftables = nft +} + +// IsNFTablesConfigured returns true if the stack has nftables configured. +func (s *Stack) IsNFTablesConfigured() bool { + return s.nftablesConfigured.Load() +} + +// SetNFTablesConfigured sets whether the stack has nftables configured. +func (s *Stack) SetNFTablesConfigured(configured bool) { + s.nftablesConfigured.Store(configured) +} + // ICMPLimit returns the maximum number of ICMP messages that can be sent // in one second. func (s *Stack) ICMPLimit() rate.Limit { @@ -2382,12 +2526,11 @@ func (s *Stack) SetNICStack(id tcpip.NICID, peer *Stack) (tcpip.NICID, tcpip.Err s.mu.Unlock() return id, nil } - delete(s.nics, id) - // Remove routes in-place. n tracks the number of routes written. - s.RemoveRoutes(func(r tcpip.Route) bool { return r.NIC == id }) - ne := nic.NetworkLinkEndpoint.(LinkEndpoint) - deferAct, err := nic.remove(false /* closeLinkEndpoint */) + linkEp := nic.NetworkLinkEndpoint.(LinkEndpoint) + name := nic.Name() + + deferAct, err := s.removeNICLocked(id, false /* closeLinkEndpoint */) s.mu.Unlock() if deferAct != nil { deferAct() @@ -2397,5 +2540,75 @@ func (s *Stack) SetNICStack(id tcpip.NICID, peer *Stack) (tcpip.NICID, tcpip.Err } id = tcpip.NICID(peer.NextNICID()) - return id, peer.CreateNICWithOptions(id, ne, NICOptions{Name: nic.Name()}) + return id, peer.CreateNICWithOptions(id, linkEp, NICOptions{Name: name}) +} + +// EnableSaveRestore marks the saveRestoreEnabled to true. +func (s *Stack) EnableSaveRestore() { + s.mu.Lock() + defer s.mu.Unlock() + + s.saveRestoreEnabled = true +} + +// IsSaveRestoreEnabled returns true if save restore is enabled for the stack. +func (s *Stack) IsSaveRestoreEnabled() bool { + s.mu.Lock() + defer s.mu.Unlock() + + return s.saveRestoreEnabled +} + +// contextID is this package's type for context.Context.Value keys. +type contextID int + +const ( + // CtxRestoreStack is a Context.Value key for the stack to be used in restore. + CtxRestoreStack contextID = iota +) + +// RestoreStackFromContext returns the stack to be used during restore. +func RestoreStackFromContext(ctx context.Context) *Stack { + if st := ctx.Value(CtxRestoreStack); st != nil { + return st.(*Stack) + } + return nil +} + +// SetRemoveConf sets the removeConf in stack to the given value. +func (s *Stack) SetRemoveConf(removeConf bool) { + s.mu.Lock() + defer s.mu.Unlock() + s.removeConf = removeConf +} + +// GetRemoveConf gets the removeConf from stack. +func (s *Stack) GetRemoveConf() bool { + s.mu.RLock() + defer s.mu.RUnlock() + return s.removeConf +} + +// DisableAllNonLoopbackNICs disables all non-loopback NICs in the stack. +func (s *Stack) DisableAllNonLoopbackNICs() { + s.mu.Lock() + defer s.mu.Unlock() + s.externalNetworkingDisabled = true + for _, nic := range s.nics { + if !nic.IsLoopback() { + nic.disable() + } + } +} + +// EnableAllNonLoopbackNICs enables all non-loopback NICs in the stack. +func (s *Stack) EnableAllNonLoopbackNICs() { + s.mu.Lock() + defer s.mu.Unlock() + s.externalNetworkingDisabled = false + for _, nic := range s.nics { + if !nic.IsLoopback() { + nic.enable() + } + } } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_mutex.go index ef672873..40f3f305 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_mutex.go @@ -92,5 +92,5 @@ func stackinitLockNames() {} func init() { stackinitLockNames() - stackprefixIndex = locking.NewMutexClass(reflect.TypeOf(stackRWMutex{}), stacklockNames) + stackprefixIndex = locking.NewMutexClass(reflect.TypeFor[stackRWMutex](), stacklockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_state_autogen.go index b3f89110..9f19ba81 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/stack_state_autogen.go @@ -85,55 +85,6 @@ func (a *AddressableEndpointStateOptions) StateLoad(ctx context.Context, stateSo stateSourceObject.Load(0, &a.HiddenWhileDisabled) } -func (a *addressState) StateTypeName() string { - return "pkg/tcpip/stack.addressState" -} - -func (a *addressState) StateFields() []string { - return []string{ - "addressableEndpointState", - "addr", - "subnet", - "temporary", - "refs", - "kind", - "configType", - "lifetimes", - "disp", - } -} - -func (a *addressState) beforeSave() {} - -// +checklocksignore -func (a *addressState) StateSave(stateSinkObject state.Sink) { - a.beforeSave() - stateSinkObject.Save(0, &a.addressableEndpointState) - stateSinkObject.Save(1, &a.addr) - stateSinkObject.Save(2, &a.subnet) - stateSinkObject.Save(3, &a.temporary) - stateSinkObject.Save(4, &a.refs) - stateSinkObject.Save(5, &a.kind) - stateSinkObject.Save(6, &a.configType) - stateSinkObject.Save(7, &a.lifetimes) - stateSinkObject.Save(8, &a.disp) -} - -func (a *addressState) afterLoad(context.Context) {} - -// +checklocksignore -func (a *addressState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &a.addressableEndpointState) - stateSourceObject.Load(1, &a.addr) - stateSourceObject.Load(2, &a.subnet) - stateSourceObject.Load(3, &a.temporary) - stateSourceObject.Load(4, &a.refs) - stateSourceObject.Load(5, &a.kind) - stateSourceObject.Load(6, &a.configType) - stateSourceObject.Load(7, &a.lifetimes) - stateSourceObject.Load(8, &a.disp) -} - func (p *bridgePort) StateTypeName() string { return "pkg/tcpip/stack.bridgePort" } @@ -173,6 +124,7 @@ func (b *BridgeEndpoint) StateFields() []string { "addr", "attached", "mtu", + "fdbTable", "maxHeaderLength", } } @@ -187,7 +139,8 @@ func (b *BridgeEndpoint) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(2, &b.addr) stateSinkObject.Save(3, &b.attached) stateSinkObject.Save(4, &b.mtu) - stateSinkObject.Save(5, &b.maxHeaderLength) + stateSinkObject.Save(5, &b.fdbTable) + stateSinkObject.Save(6, &b.maxHeaderLength) } func (b *BridgeEndpoint) afterLoad(context.Context) {} @@ -199,7 +152,8 @@ func (b *BridgeEndpoint) StateLoad(ctx context.Context, stateSourceObject state. stateSourceObject.Load(2, &b.addr) stateSourceObject.Load(3, &b.attached) stateSourceObject.Load(4, &b.mtu) - stateSourceObject.Load(5, &b.maxHeaderLength) + stateSourceObject.Load(5, &b.fdbTable) + stateSourceObject.Load(6, &b.maxHeaderLength) } func (t *tuple) StateTypeName() string { @@ -714,6 +668,34 @@ func (mt *MasqueradeTarget) StateLoad(ctx context.Context, stateSourceObject sta stateSourceObject.Load(0, &mt.NetworkProtocol) } +func (c *CTTarget) StateTypeName() string { + return "pkg/tcpip/stack.CTTarget" +} + +func (c *CTTarget) StateFields() []string { + return []string{ + "NetworkProtocol", + "Zone", + } +} + +func (c *CTTarget) beforeSave() {} + +// +checklocksignore +func (c *CTTarget) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.NetworkProtocol) + stateSinkObject.Save(1, &c.Zone) +} + +func (c *CTTarget) afterLoad(context.Context) {} + +// +checklocksignore +func (c *CTTarget) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.NetworkProtocol) + stateSourceObject.Load(1, &c.Zone) +} + func (it *IPTables) StateTypeName() string { return "pkg/tcpip/stack.IPTables" } @@ -721,7 +703,6 @@ func (it *IPTables) StateTypeName() string { func (it *IPTables) StateFields() []string { return []string{ "connections", - "reaper", "v4Tables", "v6Tables", "modified", @@ -732,19 +713,17 @@ func (it *IPTables) StateFields() []string { func (it *IPTables) StateSave(stateSinkObject state.Sink) { it.beforeSave() stateSinkObject.Save(0, &it.connections) - stateSinkObject.Save(1, &it.reaper) - stateSinkObject.Save(2, &it.v4Tables) - stateSinkObject.Save(3, &it.v6Tables) - stateSinkObject.Save(4, &it.modified) + stateSinkObject.Save(1, &it.v4Tables) + stateSinkObject.Save(2, &it.v6Tables) + stateSinkObject.Save(3, &it.modified) } // +checklocksignore func (it *IPTables) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &it.connections) - stateSourceObject.Load(1, &it.reaper) - stateSourceObject.Load(2, &it.v4Tables) - stateSourceObject.Load(3, &it.v6Tables) - stateSourceObject.Load(4, &it.modified) + stateSourceObject.Load(1, &it.v4Tables) + stateSourceObject.Load(2, &it.v6Tables) + stateSourceObject.Load(3, &it.modified) stateSourceObject.AfterLoad(func() { it.afterLoad(ctx) }) } @@ -964,6 +943,130 @@ func (n *neighborCache) StateLoad(ctx context.Context, stateSourceObject state.S stateSourceObject.Load(3, &n.mu) } +func (n *NeighborEntry) StateTypeName() string { + return "pkg/tcpip/stack.NeighborEntry" +} + +func (n *NeighborEntry) StateFields() []string { + return []string{ + "Addr", + "LinkAddr", + "State", + "UpdatedAt", + } +} + +func (n *NeighborEntry) beforeSave() {} + +// +checklocksignore +func (n *NeighborEntry) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.Addr) + stateSinkObject.Save(1, &n.LinkAddr) + stateSinkObject.Save(2, &n.State) + stateSinkObject.Save(3, &n.UpdatedAt) +} + +func (n *NeighborEntry) afterLoad(context.Context) {} + +// +checklocksignore +func (n *NeighborEntry) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.Addr) + stateSourceObject.Load(1, &n.LinkAddr) + stateSourceObject.Load(2, &n.State) + stateSourceObject.Load(3, &n.UpdatedAt) +} + +func (t *timer) StateTypeName() string { + return "pkg/tcpip/stack.timer" +} + +func (t *timer) StateFields() []string { + return []string{ + "done", + } +} + +func (t *timer) beforeSave() {} + +// +checklocksignore +func (t *timer) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.done) +} + +func (t *timer) afterLoad(context.Context) {} + +// +checklocksignore +func (t *timer) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.done) +} + +func (n *neighborEntryMu) StateTypeName() string { + return "pkg/tcpip/stack.neighborEntryMu" +} + +func (n *neighborEntryMu) StateFields() []string { + return []string{ + "neigh", + "isRouter", + "timer", + } +} + +func (n *neighborEntryMu) beforeSave() {} + +// +checklocksignore +func (n *neighborEntryMu) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.neigh) + stateSinkObject.Save(1, &n.isRouter) + stateSinkObject.Save(2, &n.timer) +} + +func (n *neighborEntryMu) afterLoad(context.Context) {} + +// +checklocksignore +func (n *neighborEntryMu) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.neigh) + stateSourceObject.Load(1, &n.isRouter) + stateSourceObject.Load(2, &n.timer) +} + +func (e *neighborEntry) StateTypeName() string { + return "pkg/tcpip/stack.neighborEntry" +} + +func (e *neighborEntry) StateFields() []string { + return []string{ + "neighborEntryEntry", + "cache", + "nudState", + "mu", + } +} + +func (e *neighborEntry) beforeSave() {} + +// +checklocksignore +func (e *neighborEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.neighborEntryEntry) + stateSinkObject.Save(1, &e.cache) + stateSinkObject.Save(2, &e.nudState) + stateSinkObject.Save(3, &e.mu) +} + +func (e *neighborEntry) afterLoad(context.Context) {} + +// +checklocksignore +func (e *neighborEntry) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.neighborEntryEntry) + stateSourceObject.Load(1, &e.cache) + stateSourceObject.Load(2, &e.nudState) + stateSourceObject.Load(3, &e.mu) +} + func (l *neighborEntryList) StateTypeName() string { return "pkg/tcpip/stack.neighborEntryList" } @@ -1071,6 +1174,7 @@ func (n *nic) StateFields() []string { "qDisc", "deliverLinkPackets", "Primary", + "experimentIPOptionEnabled", } } @@ -1096,6 +1200,7 @@ func (n *nic) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(14, &n.qDisc) stateSinkObject.Save(15, &n.deliverLinkPackets) stateSinkObject.Save(16, &n.Primary) + stateSinkObject.Save(17, &n.experimentIPOptionEnabled) } func (n *nic) afterLoad(context.Context) {} @@ -1119,6 +1224,7 @@ func (n *nic) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(14, &n.qDisc) stateSourceObject.Load(15, &n.deliverLinkPackets) stateSourceObject.Load(16, &n.Primary) + stateSourceObject.Load(17, &n.experimentIPOptionEnabled) } func (p *packetEndpointList) StateTypeName() string { @@ -1636,34 +1742,6 @@ func (p *pendingPacket) StateLoad(ctx context.Context, stateSourceObject state.S stateSourceObject.Load(1, &p.pkt) } -func (p *packetsPendingLinkResolutionMu) StateTypeName() string { - return "pkg/tcpip/stack.packetsPendingLinkResolutionMu" -} - -func (p *packetsPendingLinkResolutionMu) StateFields() []string { - return []string{ - "packets", - "cancelChans", - } -} - -func (p *packetsPendingLinkResolutionMu) beforeSave() {} - -// +checklocksignore -func (p *packetsPendingLinkResolutionMu) StateSave(stateSinkObject state.Sink) { - p.beforeSave() - stateSinkObject.Save(0, &p.packets) - stateSinkObject.Save(1, &p.cancelChans) -} - -func (p *packetsPendingLinkResolutionMu) afterLoad(context.Context) {} - -// +checklocksignore -func (p *packetsPendingLinkResolutionMu) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &p.packets) - stateSourceObject.Load(1, &p.cancelChans) -} - func (f *packetsPendingLinkResolution) StateTypeName() string { return "pkg/tcpip/stack.packetsPendingLinkResolution" } @@ -1671,7 +1749,6 @@ func (f *packetsPendingLinkResolution) StateTypeName() string { func (f *packetsPendingLinkResolution) StateFields() []string { return []string{ "nic", - "mu", } } @@ -1681,7 +1758,6 @@ func (f *packetsPendingLinkResolution) beforeSave() {} func (f *packetsPendingLinkResolution) StateSave(stateSinkObject state.Sink) { f.beforeSave() stateSinkObject.Save(0, &f.nic) - stateSinkObject.Save(1, &f.mu) } func (f *packetsPendingLinkResolution) afterLoad(context.Context) {} @@ -1689,7 +1765,6 @@ func (f *packetsPendingLinkResolution) afterLoad(context.Context) {} // +checklocksignore func (f *packetsPendingLinkResolution) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &f.nic) - stateSourceObject.Load(1, &f.mu) } func (t *TransportEndpointID) StateTypeName() string { @@ -1733,6 +1808,7 @@ func (n *NetworkPacketInfo) StateTypeName() string { func (n *NetworkPacketInfo) StateFields() []string { return []string{ "LocalAddressBroadcast", + "LocalAddressTemporary", "IsForwardedPacket", } } @@ -1743,7 +1819,8 @@ func (n *NetworkPacketInfo) beforeSave() {} func (n *NetworkPacketInfo) StateSave(stateSinkObject state.Sink) { n.beforeSave() stateSinkObject.Save(0, &n.LocalAddressBroadcast) - stateSinkObject.Save(1, &n.IsForwardedPacket) + stateSinkObject.Save(1, &n.LocalAddressTemporary) + stateSinkObject.Save(2, &n.IsForwardedPacket) } func (n *NetworkPacketInfo) afterLoad(context.Context) {} @@ -1751,7 +1828,54 @@ func (n *NetworkPacketInfo) afterLoad(context.Context) {} // +checklocksignore func (n *NetworkPacketInfo) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &n.LocalAddressBroadcast) - stateSourceObject.Load(1, &n.IsForwardedPacket) + stateSourceObject.Load(1, &n.LocalAddressTemporary) + stateSourceObject.Load(2, &n.IsForwardedPacket) +} + +func (p *PacketMMapOpts) StateTypeName() string { + return "pkg/tcpip/stack.PacketMMapOpts" +} + +func (p *PacketMMapOpts) StateFields() []string { + return []string{ + "Req", + "IsRx", + "Cooked", + "Stack", + "Wq", + "PacketEndpoint", + "Version", + "Reserve", + } +} + +func (p *PacketMMapOpts) beforeSave() {} + +// +checklocksignore +func (p *PacketMMapOpts) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Req) + stateSinkObject.Save(1, &p.IsRx) + stateSinkObject.Save(2, &p.Cooked) + stateSinkObject.Save(3, &p.Stack) + stateSinkObject.Save(4, &p.Wq) + stateSinkObject.Save(5, &p.PacketEndpoint) + stateSinkObject.Save(6, &p.Version) + stateSinkObject.Save(7, &p.Reserve) +} + +func (p *PacketMMapOpts) afterLoad(context.Context) {} + +// +checklocksignore +func (p *PacketMMapOpts) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Req) + stateSourceObject.Load(1, &p.IsRx) + stateSourceObject.Load(2, &p.Cooked) + stateSourceObject.Load(3, &p.Stack) + stateSourceObject.Load(4, &p.Wq) + stateSourceObject.Load(5, &p.PacketEndpoint) + stateSourceObject.Load(6, &p.Version) + stateSourceObject.Load(7, &p.Reserve) } func (lifetimes *AddressLifetimes) StateTypeName() string { @@ -1994,14 +2118,12 @@ func (s *Stack) StateFields() []string { "packetEndpointWriteSupported", "demux", "stats", - "routeTable", - "nics", "defaultForwardingEnabled", - "nicIDGen", "cleanupEndpoints", "PortManager", "clock", "handleLocal", + "nftablesConfigured", "restoredEndpoints", "resumableEndpoints", "icmpRateLimiter", @@ -2012,11 +2134,11 @@ func (s *Stack) StateFields() []string { "receiveBufferSize", "tcpInvalidRateLimit", "tsOffsetSecret", + "saveRestoreEnabled", + "externalNetworkingDisabled", } } -func (s *Stack) beforeSave() {} - // +checklocksignore func (s *Stack) StateSave(stateSinkObject state.Sink) { s.beforeSave() @@ -2026,28 +2148,26 @@ func (s *Stack) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(3, &s.packetEndpointWriteSupported) stateSinkObject.Save(4, &s.demux) stateSinkObject.Save(5, &s.stats) - stateSinkObject.Save(6, &s.routeTable) - stateSinkObject.Save(7, &s.nics) - stateSinkObject.Save(8, &s.defaultForwardingEnabled) - stateSinkObject.Save(9, &s.nicIDGen) - stateSinkObject.Save(10, &s.cleanupEndpoints) - stateSinkObject.Save(11, &s.PortManager) - stateSinkObject.Save(12, &s.clock) - stateSinkObject.Save(13, &s.handleLocal) - stateSinkObject.Save(14, &s.restoredEndpoints) - stateSinkObject.Save(15, &s.resumableEndpoints) - stateSinkObject.Save(16, &s.icmpRateLimiter) - stateSinkObject.Save(17, &s.seed) - stateSinkObject.Save(18, &s.nudConfigs) - stateSinkObject.Save(19, &s.nudDisp) - stateSinkObject.Save(20, &s.sendBufferSize) - stateSinkObject.Save(21, &s.receiveBufferSize) - stateSinkObject.Save(22, &s.tcpInvalidRateLimit) - stateSinkObject.Save(23, &s.tsOffsetSecret) + stateSinkObject.Save(6, &s.defaultForwardingEnabled) + stateSinkObject.Save(7, &s.cleanupEndpoints) + stateSinkObject.Save(8, &s.PortManager) + stateSinkObject.Save(9, &s.clock) + stateSinkObject.Save(10, &s.handleLocal) + stateSinkObject.Save(11, &s.nftablesConfigured) + stateSinkObject.Save(12, &s.restoredEndpoints) + stateSinkObject.Save(13, &s.resumableEndpoints) + stateSinkObject.Save(14, &s.icmpRateLimiter) + stateSinkObject.Save(15, &s.seed) + stateSinkObject.Save(16, &s.nudConfigs) + stateSinkObject.Save(17, &s.nudDisp) + stateSinkObject.Save(18, &s.sendBufferSize) + stateSinkObject.Save(19, &s.receiveBufferSize) + stateSinkObject.Save(20, &s.tcpInvalidRateLimit) + stateSinkObject.Save(21, &s.tsOffsetSecret) + stateSinkObject.Save(22, &s.saveRestoreEnabled) + stateSinkObject.Save(23, &s.externalNetworkingDisabled) } -func (s *Stack) afterLoad(context.Context) {} - // +checklocksignore func (s *Stack) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &s.transportProtocols) @@ -2056,24 +2176,25 @@ func (s *Stack) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(3, &s.packetEndpointWriteSupported) stateSourceObject.Load(4, &s.demux) stateSourceObject.Load(5, &s.stats) - stateSourceObject.Load(6, &s.routeTable) - stateSourceObject.Load(7, &s.nics) - stateSourceObject.Load(8, &s.defaultForwardingEnabled) - stateSourceObject.Load(9, &s.nicIDGen) - stateSourceObject.Load(10, &s.cleanupEndpoints) - stateSourceObject.Load(11, &s.PortManager) - stateSourceObject.Load(12, &s.clock) - stateSourceObject.Load(13, &s.handleLocal) - stateSourceObject.Load(14, &s.restoredEndpoints) - stateSourceObject.Load(15, &s.resumableEndpoints) - stateSourceObject.Load(16, &s.icmpRateLimiter) - stateSourceObject.Load(17, &s.seed) - stateSourceObject.Load(18, &s.nudConfigs) - stateSourceObject.Load(19, &s.nudDisp) - stateSourceObject.Load(20, &s.sendBufferSize) - stateSourceObject.Load(21, &s.receiveBufferSize) - stateSourceObject.Load(22, &s.tcpInvalidRateLimit) - stateSourceObject.Load(23, &s.tsOffsetSecret) + stateSourceObject.Load(6, &s.defaultForwardingEnabled) + stateSourceObject.Load(7, &s.cleanupEndpoints) + stateSourceObject.Load(8, &s.PortManager) + stateSourceObject.Load(9, &s.clock) + stateSourceObject.Load(10, &s.handleLocal) + stateSourceObject.Load(11, &s.nftablesConfigured) + stateSourceObject.Load(12, &s.restoredEndpoints) + stateSourceObject.Load(13, &s.resumableEndpoints) + stateSourceObject.Load(14, &s.icmpRateLimiter) + stateSourceObject.Load(15, &s.seed) + stateSourceObject.Load(16, &s.nudConfigs) + stateSourceObject.Load(17, &s.nudDisp) + stateSourceObject.Load(18, &s.sendBufferSize) + stateSourceObject.Load(19, &s.receiveBufferSize) + stateSourceObject.Load(20, &s.tcpInvalidRateLimit) + stateSourceObject.Load(21, &s.tsOffsetSecret) + stateSourceObject.Load(22, &s.saveRestoreEnabled) + stateSourceObject.Load(23, &s.externalNetworkingDisabled) + stateSourceObject.AfterLoad(func() { s.afterLoad(ctx) }) } func (t *TransportEndpointInfo) StateTypeName() string { @@ -2116,586 +2237,6 @@ func (t *TransportEndpointInfo) StateLoad(ctx context.Context, stateSourceObject stateSourceObject.Load(5, &t.RegisterNICID) } -func (t *TCPCubicState) StateTypeName() string { - return "pkg/tcpip/stack.TCPCubicState" -} - -func (t *TCPCubicState) StateFields() []string { - return []string{ - "WLastMax", - "WMax", - "T", - "TimeSinceLastCongestion", - "C", - "K", - "Beta", - "WC", - "WEst", - "EndSeq", - "CurrRTT", - "LastRTT", - "SampleCount", - "LastAck", - "RoundStart", - } -} - -func (t *TCPCubicState) beforeSave() {} - -// +checklocksignore -func (t *TCPCubicState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.WLastMax) - stateSinkObject.Save(1, &t.WMax) - stateSinkObject.Save(2, &t.T) - stateSinkObject.Save(3, &t.TimeSinceLastCongestion) - stateSinkObject.Save(4, &t.C) - stateSinkObject.Save(5, &t.K) - stateSinkObject.Save(6, &t.Beta) - stateSinkObject.Save(7, &t.WC) - stateSinkObject.Save(8, &t.WEst) - stateSinkObject.Save(9, &t.EndSeq) - stateSinkObject.Save(10, &t.CurrRTT) - stateSinkObject.Save(11, &t.LastRTT) - stateSinkObject.Save(12, &t.SampleCount) - stateSinkObject.Save(13, &t.LastAck) - stateSinkObject.Save(14, &t.RoundStart) -} - -func (t *TCPCubicState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPCubicState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.WLastMax) - stateSourceObject.Load(1, &t.WMax) - stateSourceObject.Load(2, &t.T) - stateSourceObject.Load(3, &t.TimeSinceLastCongestion) - stateSourceObject.Load(4, &t.C) - stateSourceObject.Load(5, &t.K) - stateSourceObject.Load(6, &t.Beta) - stateSourceObject.Load(7, &t.WC) - stateSourceObject.Load(8, &t.WEst) - stateSourceObject.Load(9, &t.EndSeq) - stateSourceObject.Load(10, &t.CurrRTT) - stateSourceObject.Load(11, &t.LastRTT) - stateSourceObject.Load(12, &t.SampleCount) - stateSourceObject.Load(13, &t.LastAck) - stateSourceObject.Load(14, &t.RoundStart) -} - -func (t *TCPRACKState) StateTypeName() string { - return "pkg/tcpip/stack.TCPRACKState" -} - -func (t *TCPRACKState) StateFields() []string { - return []string{ - "XmitTime", - "EndSequence", - "FACK", - "RTT", - "Reord", - "DSACKSeen", - "ReoWnd", - "ReoWndIncr", - "ReoWndPersist", - "RTTSeq", - } -} - -func (t *TCPRACKState) beforeSave() {} - -// +checklocksignore -func (t *TCPRACKState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.XmitTime) - stateSinkObject.Save(1, &t.EndSequence) - stateSinkObject.Save(2, &t.FACK) - stateSinkObject.Save(3, &t.RTT) - stateSinkObject.Save(4, &t.Reord) - stateSinkObject.Save(5, &t.DSACKSeen) - stateSinkObject.Save(6, &t.ReoWnd) - stateSinkObject.Save(7, &t.ReoWndIncr) - stateSinkObject.Save(8, &t.ReoWndPersist) - stateSinkObject.Save(9, &t.RTTSeq) -} - -func (t *TCPRACKState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPRACKState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.XmitTime) - stateSourceObject.Load(1, &t.EndSequence) - stateSourceObject.Load(2, &t.FACK) - stateSourceObject.Load(3, &t.RTT) - stateSourceObject.Load(4, &t.Reord) - stateSourceObject.Load(5, &t.DSACKSeen) - stateSourceObject.Load(6, &t.ReoWnd) - stateSourceObject.Load(7, &t.ReoWndIncr) - stateSourceObject.Load(8, &t.ReoWndPersist) - stateSourceObject.Load(9, &t.RTTSeq) -} - -func (t *TCPEndpointID) StateTypeName() string { - return "pkg/tcpip/stack.TCPEndpointID" -} - -func (t *TCPEndpointID) StateFields() []string { - return []string{ - "LocalPort", - "LocalAddress", - "RemotePort", - "RemoteAddress", - } -} - -func (t *TCPEndpointID) beforeSave() {} - -// +checklocksignore -func (t *TCPEndpointID) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.LocalPort) - stateSinkObject.Save(1, &t.LocalAddress) - stateSinkObject.Save(2, &t.RemotePort) - stateSinkObject.Save(3, &t.RemoteAddress) -} - -func (t *TCPEndpointID) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPEndpointID) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.LocalPort) - stateSourceObject.Load(1, &t.LocalAddress) - stateSourceObject.Load(2, &t.RemotePort) - stateSourceObject.Load(3, &t.RemoteAddress) -} - -func (t *TCPFastRecoveryState) StateTypeName() string { - return "pkg/tcpip/stack.TCPFastRecoveryState" -} - -func (t *TCPFastRecoveryState) StateFields() []string { - return []string{ - "Active", - "First", - "Last", - "MaxCwnd", - "HighRxt", - "RescueRxt", - } -} - -func (t *TCPFastRecoveryState) beforeSave() {} - -// +checklocksignore -func (t *TCPFastRecoveryState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.Active) - stateSinkObject.Save(1, &t.First) - stateSinkObject.Save(2, &t.Last) - stateSinkObject.Save(3, &t.MaxCwnd) - stateSinkObject.Save(4, &t.HighRxt) - stateSinkObject.Save(5, &t.RescueRxt) -} - -func (t *TCPFastRecoveryState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPFastRecoveryState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.Active) - stateSourceObject.Load(1, &t.First) - stateSourceObject.Load(2, &t.Last) - stateSourceObject.Load(3, &t.MaxCwnd) - stateSourceObject.Load(4, &t.HighRxt) - stateSourceObject.Load(5, &t.RescueRxt) -} - -func (t *TCPReceiverState) StateTypeName() string { - return "pkg/tcpip/stack.TCPReceiverState" -} - -func (t *TCPReceiverState) StateFields() []string { - return []string{ - "RcvNxt", - "RcvAcc", - "RcvWndScale", - "PendingBufUsed", - } -} - -func (t *TCPReceiverState) beforeSave() {} - -// +checklocksignore -func (t *TCPReceiverState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.RcvNxt) - stateSinkObject.Save(1, &t.RcvAcc) - stateSinkObject.Save(2, &t.RcvWndScale) - stateSinkObject.Save(3, &t.PendingBufUsed) -} - -func (t *TCPReceiverState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPReceiverState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.RcvNxt) - stateSourceObject.Load(1, &t.RcvAcc) - stateSourceObject.Load(2, &t.RcvWndScale) - stateSourceObject.Load(3, &t.PendingBufUsed) -} - -func (t *TCPRTTState) StateTypeName() string { - return "pkg/tcpip/stack.TCPRTTState" -} - -func (t *TCPRTTState) StateFields() []string { - return []string{ - "SRTT", - "RTTVar", - "SRTTInited", - } -} - -func (t *TCPRTTState) beforeSave() {} - -// +checklocksignore -func (t *TCPRTTState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.SRTT) - stateSinkObject.Save(1, &t.RTTVar) - stateSinkObject.Save(2, &t.SRTTInited) -} - -func (t *TCPRTTState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPRTTState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.SRTT) - stateSourceObject.Load(1, &t.RTTVar) - stateSourceObject.Load(2, &t.SRTTInited) -} - -func (t *TCPSenderState) StateTypeName() string { - return "pkg/tcpip/stack.TCPSenderState" -} - -func (t *TCPSenderState) StateFields() []string { - return []string{ - "LastSendTime", - "DupAckCount", - "SndCwnd", - "Ssthresh", - "SndCAAckCount", - "Outstanding", - "SackedOut", - "SndWnd", - "SndUna", - "SndNxt", - "RTTMeasureSeqNum", - "RTTMeasureTime", - "Closed", - "RTO", - "RTTState", - "MaxPayloadSize", - "SndWndScale", - "MaxSentAck", - "FastRecovery", - "Cubic", - "RACKState", - "RetransmitTS", - "SpuriousRecovery", - } -} - -func (t *TCPSenderState) beforeSave() {} - -// +checklocksignore -func (t *TCPSenderState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.LastSendTime) - stateSinkObject.Save(1, &t.DupAckCount) - stateSinkObject.Save(2, &t.SndCwnd) - stateSinkObject.Save(3, &t.Ssthresh) - stateSinkObject.Save(4, &t.SndCAAckCount) - stateSinkObject.Save(5, &t.Outstanding) - stateSinkObject.Save(6, &t.SackedOut) - stateSinkObject.Save(7, &t.SndWnd) - stateSinkObject.Save(8, &t.SndUna) - stateSinkObject.Save(9, &t.SndNxt) - stateSinkObject.Save(10, &t.RTTMeasureSeqNum) - stateSinkObject.Save(11, &t.RTTMeasureTime) - stateSinkObject.Save(12, &t.Closed) - stateSinkObject.Save(13, &t.RTO) - stateSinkObject.Save(14, &t.RTTState) - stateSinkObject.Save(15, &t.MaxPayloadSize) - stateSinkObject.Save(16, &t.SndWndScale) - stateSinkObject.Save(17, &t.MaxSentAck) - stateSinkObject.Save(18, &t.FastRecovery) - stateSinkObject.Save(19, &t.Cubic) - stateSinkObject.Save(20, &t.RACKState) - stateSinkObject.Save(21, &t.RetransmitTS) - stateSinkObject.Save(22, &t.SpuriousRecovery) -} - -func (t *TCPSenderState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPSenderState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.LastSendTime) - stateSourceObject.Load(1, &t.DupAckCount) - stateSourceObject.Load(2, &t.SndCwnd) - stateSourceObject.Load(3, &t.Ssthresh) - stateSourceObject.Load(4, &t.SndCAAckCount) - stateSourceObject.Load(5, &t.Outstanding) - stateSourceObject.Load(6, &t.SackedOut) - stateSourceObject.Load(7, &t.SndWnd) - stateSourceObject.Load(8, &t.SndUna) - stateSourceObject.Load(9, &t.SndNxt) - stateSourceObject.Load(10, &t.RTTMeasureSeqNum) - stateSourceObject.Load(11, &t.RTTMeasureTime) - stateSourceObject.Load(12, &t.Closed) - stateSourceObject.Load(13, &t.RTO) - stateSourceObject.Load(14, &t.RTTState) - stateSourceObject.Load(15, &t.MaxPayloadSize) - stateSourceObject.Load(16, &t.SndWndScale) - stateSourceObject.Load(17, &t.MaxSentAck) - stateSourceObject.Load(18, &t.FastRecovery) - stateSourceObject.Load(19, &t.Cubic) - stateSourceObject.Load(20, &t.RACKState) - stateSourceObject.Load(21, &t.RetransmitTS) - stateSourceObject.Load(22, &t.SpuriousRecovery) -} - -func (t *TCPSACKInfo) StateTypeName() string { - return "pkg/tcpip/stack.TCPSACKInfo" -} - -func (t *TCPSACKInfo) StateFields() []string { - return []string{ - "Blocks", - "ReceivedBlocks", - "MaxSACKED", - } -} - -func (t *TCPSACKInfo) beforeSave() {} - -// +checklocksignore -func (t *TCPSACKInfo) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.Blocks) - stateSinkObject.Save(1, &t.ReceivedBlocks) - stateSinkObject.Save(2, &t.MaxSACKED) -} - -func (t *TCPSACKInfo) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPSACKInfo) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.Blocks) - stateSourceObject.Load(1, &t.ReceivedBlocks) - stateSourceObject.Load(2, &t.MaxSACKED) -} - -func (r *RcvBufAutoTuneParams) StateTypeName() string { - return "pkg/tcpip/stack.RcvBufAutoTuneParams" -} - -func (r *RcvBufAutoTuneParams) StateFields() []string { - return []string{ - "MeasureTime", - "CopiedBytes", - "PrevCopiedBytes", - "RcvBufSize", - "RTT", - "RTTVar", - "RTTMeasureSeqNumber", - "RTTMeasureTime", - "Disabled", - } -} - -func (r *RcvBufAutoTuneParams) beforeSave() {} - -// +checklocksignore -func (r *RcvBufAutoTuneParams) StateSave(stateSinkObject state.Sink) { - r.beforeSave() - stateSinkObject.Save(0, &r.MeasureTime) - stateSinkObject.Save(1, &r.CopiedBytes) - stateSinkObject.Save(2, &r.PrevCopiedBytes) - stateSinkObject.Save(3, &r.RcvBufSize) - stateSinkObject.Save(4, &r.RTT) - stateSinkObject.Save(5, &r.RTTVar) - stateSinkObject.Save(6, &r.RTTMeasureSeqNumber) - stateSinkObject.Save(7, &r.RTTMeasureTime) - stateSinkObject.Save(8, &r.Disabled) -} - -func (r *RcvBufAutoTuneParams) afterLoad(context.Context) {} - -// +checklocksignore -func (r *RcvBufAutoTuneParams) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &r.MeasureTime) - stateSourceObject.Load(1, &r.CopiedBytes) - stateSourceObject.Load(2, &r.PrevCopiedBytes) - stateSourceObject.Load(3, &r.RcvBufSize) - stateSourceObject.Load(4, &r.RTT) - stateSourceObject.Load(5, &r.RTTVar) - stateSourceObject.Load(6, &r.RTTMeasureSeqNumber) - stateSourceObject.Load(7, &r.RTTMeasureTime) - stateSourceObject.Load(8, &r.Disabled) -} - -func (t *TCPRcvBufState) StateTypeName() string { - return "pkg/tcpip/stack.TCPRcvBufState" -} - -func (t *TCPRcvBufState) StateFields() []string { - return []string{ - "RcvBufUsed", - "RcvAutoParams", - "RcvClosed", - } -} - -func (t *TCPRcvBufState) beforeSave() {} - -// +checklocksignore -func (t *TCPRcvBufState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.RcvBufUsed) - stateSinkObject.Save(1, &t.RcvAutoParams) - stateSinkObject.Save(2, &t.RcvClosed) -} - -func (t *TCPRcvBufState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPRcvBufState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.RcvBufUsed) - stateSourceObject.Load(1, &t.RcvAutoParams) - stateSourceObject.Load(2, &t.RcvClosed) -} - -func (t *TCPSndBufState) StateTypeName() string { - return "pkg/tcpip/stack.TCPSndBufState" -} - -func (t *TCPSndBufState) StateFields() []string { - return []string{ - "SndBufSize", - "SndBufUsed", - "SndClosed", - "PacketTooBigCount", - "SndMTU", - "AutoTuneSndBufDisabled", - } -} - -func (t *TCPSndBufState) beforeSave() {} - -// +checklocksignore -func (t *TCPSndBufState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.SndBufSize) - stateSinkObject.Save(1, &t.SndBufUsed) - stateSinkObject.Save(2, &t.SndClosed) - stateSinkObject.Save(3, &t.PacketTooBigCount) - stateSinkObject.Save(4, &t.SndMTU) - stateSinkObject.Save(5, &t.AutoTuneSndBufDisabled) -} - -func (t *TCPSndBufState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPSndBufState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.SndBufSize) - stateSourceObject.Load(1, &t.SndBufUsed) - stateSourceObject.Load(2, &t.SndClosed) - stateSourceObject.Load(3, &t.PacketTooBigCount) - stateSourceObject.Load(4, &t.SndMTU) - stateSourceObject.Load(5, &t.AutoTuneSndBufDisabled) -} - -func (t *TCPEndpointStateInner) StateTypeName() string { - return "pkg/tcpip/stack.TCPEndpointStateInner" -} - -func (t *TCPEndpointStateInner) StateFields() []string { - return []string{ - "TSOffset", - "SACKPermitted", - "SendTSOk", - "RecentTS", - } -} - -func (t *TCPEndpointStateInner) beforeSave() {} - -// +checklocksignore -func (t *TCPEndpointStateInner) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.TSOffset) - stateSinkObject.Save(1, &t.SACKPermitted) - stateSinkObject.Save(2, &t.SendTSOk) - stateSinkObject.Save(3, &t.RecentTS) -} - -func (t *TCPEndpointStateInner) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPEndpointStateInner) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.TSOffset) - stateSourceObject.Load(1, &t.SACKPermitted) - stateSourceObject.Load(2, &t.SendTSOk) - stateSourceObject.Load(3, &t.RecentTS) -} - -func (t *TCPEndpointState) StateTypeName() string { - return "pkg/tcpip/stack.TCPEndpointState" -} - -func (t *TCPEndpointState) StateFields() []string { - return []string{ - "TCPEndpointStateInner", - "ID", - "SegTime", - "RcvBufState", - "SndBufState", - "SACK", - "Receiver", - "Sender", - } -} - -func (t *TCPEndpointState) beforeSave() {} - -// +checklocksignore -func (t *TCPEndpointState) StateSave(stateSinkObject state.Sink) { - t.beforeSave() - stateSinkObject.Save(0, &t.TCPEndpointStateInner) - stateSinkObject.Save(1, &t.ID) - stateSinkObject.Save(2, &t.SegTime) - stateSinkObject.Save(3, &t.RcvBufState) - stateSinkObject.Save(4, &t.SndBufState) - stateSinkObject.Save(5, &t.SACK) - stateSinkObject.Save(6, &t.Receiver) - stateSinkObject.Save(7, &t.Sender) -} - -func (t *TCPEndpointState) afterLoad(context.Context) {} - -// +checklocksignore -func (t *TCPEndpointState) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &t.TCPEndpointStateInner) - stateSourceObject.Load(1, &t.ID) - stateSourceObject.Load(2, &t.SegTime) - stateSourceObject.Load(3, &t.RcvBufState) - stateSourceObject.Load(4, &t.SndBufState) - stateSourceObject.Load(5, &t.SACK) - stateSourceObject.Load(6, &t.Receiver) - stateSourceObject.Load(7, &t.Sender) -} - func (p *protocolIDs) StateTypeName() string { return "pkg/tcpip/stack.protocolIDs" } @@ -2908,7 +2449,6 @@ func init() { state.Register((*addressStateRefs)(nil)) state.Register((*AddressableEndpointState)(nil)) state.Register((*AddressableEndpointStateOptions)(nil)) - state.Register((*addressState)(nil)) state.Register((*bridgePort)(nil)) state.Register((*BridgeEndpoint)(nil)) state.Register((*tuple)(nil)) @@ -2928,6 +2468,7 @@ func init() { state.Register((*RedirectTarget)(nil)) state.Register((*SNATTarget)(nil)) state.Register((*MasqueradeTarget)(nil)) + state.Register((*CTTarget)(nil)) state.Register((*IPTables)(nil)) state.Register((*Table)(nil)) state.Register((*Rule)(nil)) @@ -2935,6 +2476,10 @@ func init() { state.Register((*dynamicCacheEntry)(nil)) state.Register((*neighborCacheMu)(nil)) state.Register((*neighborCache)(nil)) + state.Register((*NeighborEntry)(nil)) + state.Register((*timer)(nil)) + state.Register((*neighborEntryMu)(nil)) + state.Register((*neighborEntry)(nil)) state.Register((*neighborEntryList)(nil)) state.Register((*neighborEntryEntry)(nil)) state.Register((*linkResolver)(nil)) @@ -2954,10 +2499,10 @@ func init() { state.Register((*PacketBufferList)(nil)) state.Register((*packetBufferRefs)(nil)) state.Register((*pendingPacket)(nil)) - state.Register((*packetsPendingLinkResolutionMu)(nil)) state.Register((*packetsPendingLinkResolution)(nil)) state.Register((*TransportEndpointID)(nil)) state.Register((*NetworkPacketInfo)(nil)) + state.Register((*PacketMMapOpts)(nil)) state.Register((*AddressLifetimes)(nil)) state.Register((*UnicastSourceAndMulticastDestination)(nil)) state.Register((*DADConfigurations)(nil)) @@ -2968,19 +2513,6 @@ func init() { state.Register((*transportProtocolState)(nil)) state.Register((*Stack)(nil)) state.Register((*TransportEndpointInfo)(nil)) - state.Register((*TCPCubicState)(nil)) - state.Register((*TCPRACKState)(nil)) - state.Register((*TCPEndpointID)(nil)) - state.Register((*TCPFastRecoveryState)(nil)) - state.Register((*TCPReceiverState)(nil)) - state.Register((*TCPRTTState)(nil)) - state.Register((*TCPSenderState)(nil)) - state.Register((*TCPSACKInfo)(nil)) - state.Register((*RcvBufAutoTuneParams)(nil)) - state.Register((*TCPRcvBufState)(nil)) - state.Register((*TCPSndBufState)(nil)) - state.Register((*TCPEndpointStateInner)(nil)) - state.Register((*TCPEndpointState)(nil)) state.Register((*protocolIDs)(nil)) state.Register((*transportEndpoints)(nil)) state.Register((*endpointsByNIC)(nil)) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/state_conn_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/state_conn_mutex.go index 6f9075b5..9cb8634f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/state_conn_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/state_conn_mutex.go @@ -92,5 +92,5 @@ func stateConninitLockNames() {} func init() { stateConninitLockNames() - stateConnprefixIndex = locking.NewMutexClass(reflect.TypeOf(stateConnRWMutex{}), stateConnlockNames) + stateConnprefixIndex = locking.NewMutexClass(reflect.TypeFor[stateConnRWMutex](), stateConnlockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/transport_endpoints_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/transport_endpoints_mutex.go index cb6f13d7..acde7844 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/transport_endpoints_mutex.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/transport_endpoints_mutex.go @@ -92,5 +92,5 @@ func transportEndpointsinitLockNames() {} func init() { transportEndpointsinitLockNames() - transportEndpointsprefixIndex = locking.NewMutexClass(reflect.TypeOf(transportEndpointsRWMutex{}), transportEndpointslockNames) + transportEndpointsprefixIndex = locking.NewMutexClass(reflect.TypeFor[transportEndpointsRWMutex](), transportEndpointslockNames) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stdclock.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/stdclock.go index e80e7c4b..cc3397ca 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stdclock.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/stdclock.go @@ -91,7 +91,6 @@ func (*stdClock) AfterFunc(d time.Duration, f func()) Timer { } } -// +stateify savable type stdTimer struct { t *time.Timer } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip.go index b8948173..f9ba7e50 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip.go @@ -35,7 +35,6 @@ import ( "io" "math" "math/bits" - "math/rand" "net" "reflect" "strconv" @@ -43,16 +42,15 @@ import ( "time" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) // Using the header package here would cause an import cycle. const ( - ipv4AddressSize = 4 - ipv4ProtocolNumber = 0x0800 - ipv6AddressSize = 16 - ipv6ProtocolNumber = 0x86dd + ipv4AddressSize = 4 + ipv6AddressSize = 16 ) const ( @@ -712,6 +710,10 @@ type ReadOptions struct { // NeedLinkPacketInfo indicates whether to return the link-layer information, // if supported. NeedLinkPacketInfo bool + + // NeedRecvdExperimentOption indicates whether to return the experiment + // option value from the last received packet, if supported. + NeedReceivedExperimentOption bool } // ReadResult represents result for a successful Endpoint.Read. @@ -732,6 +734,10 @@ type ReadResult struct { // LinkPacketInfo is the link-layer information of the received packet if // ReadOptions.NeedLinkPacketInfo is true. LinkPacketInfo LinkPacketInfo + + // ReceivedExperimentOption is the experiment option value from the last + // received packet if ReadOptions.NeedReceivedExperimentOption is true. + ReceivedExperimentOption uint16 } // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) @@ -949,9 +955,8 @@ const ( // MTUDiscoverOption is used to set/get the path MTU discovery setting. // - // NOTE: Setting this option to any other value than PMTUDiscoveryDont - // is not supported and will fail as such, and getting this option will - // always return PMTUDiscoveryDont. + // The value controls whether the Don't Fragment (DF) bit is set on + // outgoing IPv4 packets. MTUDiscoverOption // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control @@ -996,6 +1001,17 @@ const ( // IPv6Checksum is used to request the stack to populate and validate the IPv6 // checksum for transport level headers. IPv6Checksum + + // PacketMMapVersionOption is used to set the packet mmap version. + PacketMMapVersionOption + + // PacketMMapReserveOption is used to set the packet mmap reserved space + // between the aligned header and the payload. + PacketMMapReserveOption + + // IPv6MulticastInterfaceOption is used to set/get the NIC used for + // IPv6 multicast Tx. + IPv6MulticastInterfaceOption ) const ( @@ -1185,6 +1201,30 @@ func (*ICMPv6Filter) isGettableSocketOption() {} func (*ICMPv6Filter) isSettableSocketOption() {} +// TpacketReq is the tpacket_req structure as described in +// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt +// +// +stateify savable +type TpacketReq struct { + TpBlockSize uint32 + TpBlockNr uint32 + TpFrameSize uint32 + TpFrameNr uint32 +} + +func (*TpacketReq) isSettableSocketOption() {} + +// TpacketStats is the statistics for a packet_mmap ring buffer from +// . +// +// +stateify savable +type TpacketStats struct { + Packets uint32 + Dropped uint32 +} + +func (*TpacketStats) isGettableSocketOption() {} + // EndpointState represents the state of an endpoint. type EndpointState uint8 @@ -1981,6 +2021,10 @@ type IPForwardingStats struct { // successfully forwarded. Errors *StatCounter + // OutgoingDeviceClosedForSend is the number of packets that were dropped due + // to the outgoing device being closed for send. + OutgoingDeviceClosedForSend *StatCounter + // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip_state_autogen.go index 7a75e886..481323d3 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/tcpip_state_autogen.go @@ -932,6 +932,27 @@ func (e *ErrMulticastInputCannotBeOutput) afterLoad(context.Context) {} func (e *ErrMulticastInputCannotBeOutput) StateLoad(ctx context.Context, stateSourceObject state.Source) { } +func (e *ErrEndpointBusy) StateTypeName() string { + return "pkg/tcpip.ErrEndpointBusy" +} + +func (e *ErrEndpointBusy) StateFields() []string { + return []string{} +} + +func (e *ErrEndpointBusy) beforeSave() {} + +// +checklocksignore +func (e *ErrEndpointBusy) StateSave(stateSinkObject state.Sink) { + e.beforeSave() +} + +func (e *ErrEndpointBusy) afterLoad(context.Context) {} + +// +checklocksignore +func (e *ErrEndpointBusy) StateLoad(ctx context.Context, stateSourceObject state.Source) { +} + func (l *RouteList) StateTypeName() string { return "pkg/tcpip.RouteList" } @@ -1078,6 +1099,7 @@ func (so *SocketOptions) StateFields() []string { "receiveBufferSize", "linger", "rcvlowat", + "experimentOptionValue", } } @@ -1114,6 +1136,7 @@ func (so *SocketOptions) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(25, &so.receiveBufferSize) stateSinkObject.Save(26, &so.linger) stateSinkObject.Save(27, &so.rcvlowat) + stateSinkObject.Save(28, &so.experimentOptionValue) } func (so *SocketOptions) afterLoad(context.Context) {} @@ -1148,6 +1171,7 @@ func (so *SocketOptions) StateLoad(ctx context.Context, stateSourceObject state. stateSourceObject.Load(25, &so.receiveBufferSize) stateSourceObject.Load(26, &so.linger) stateSourceObject.Load(27, &so.rcvlowat) + stateSourceObject.Load(28, &so.experimentOptionValue) } func (l *LocalSockError) StateTypeName() string { @@ -1240,31 +1264,6 @@ func (s *stdClock) StateLoad(ctx context.Context, stateSourceObject state.Source stateSourceObject.AfterLoad(func() { s.afterLoad(ctx) }) } -func (st *stdTimer) StateTypeName() string { - return "pkg/tcpip.stdTimer" -} - -func (st *stdTimer) StateFields() []string { - return []string{ - "t", - } -} - -func (st *stdTimer) beforeSave() {} - -// +checklocksignore -func (st *stdTimer) StateSave(stateSinkObject state.Sink) { - st.beforeSave() - stateSinkObject.Save(0, &st.t) -} - -func (st *stdTimer) afterLoad(context.Context) {} - -// +checklocksignore -func (st *stdTimer) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &st.t) -} - func (mt *MonotonicTime) StateTypeName() string { return "pkg/tcpip.MonotonicTime" } @@ -1481,8 +1480,8 @@ func (c *ReceivableControlMessages) beforeSave() {} // +checklocksignore func (c *ReceivableControlMessages) StateSave(stateSinkObject state.Sink) { c.beforeSave() - var TimestampValue int64 - TimestampValue = c.saveTimestamp() + TimestampValue := c.saveTimestamp() + _ = (int64)(TimestampValue) stateSinkObject.SaveValue(0, TimestampValue) stateSinkObject.Save(1, &c.HasInq) stateSinkObject.Save(2, &c.Inq) @@ -1644,6 +1643,68 @@ func (f *ICMPv6Filter) StateLoad(ctx context.Context, stateSourceObject state.So stateSourceObject.Load(0, &f.DenyType) } +func (t *TpacketReq) StateTypeName() string { + return "pkg/tcpip.TpacketReq" +} + +func (t *TpacketReq) StateFields() []string { + return []string{ + "TpBlockSize", + "TpBlockNr", + "TpFrameSize", + "TpFrameNr", + } +} + +func (t *TpacketReq) beforeSave() {} + +// +checklocksignore +func (t *TpacketReq) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.TpBlockSize) + stateSinkObject.Save(1, &t.TpBlockNr) + stateSinkObject.Save(2, &t.TpFrameSize) + stateSinkObject.Save(3, &t.TpFrameNr) +} + +func (t *TpacketReq) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TpacketReq) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.TpBlockSize) + stateSourceObject.Load(1, &t.TpBlockNr) + stateSourceObject.Load(2, &t.TpFrameSize) + stateSourceObject.Load(3, &t.TpFrameNr) +} + +func (t *TpacketStats) StateTypeName() string { + return "pkg/tcpip.TpacketStats" +} + +func (t *TpacketStats) StateFields() []string { + return []string{ + "Packets", + "Dropped", + } +} + +func (t *TpacketStats) beforeSave() {} + +// +checklocksignore +func (t *TpacketStats) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Packets) + stateSinkObject.Save(1, &t.Dropped) +} + +func (t *TpacketStats) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TpacketStats) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Packets) + stateSourceObject.Load(1, &t.Dropped) +} + func (l *LingerOption) StateTypeName() string { return "pkg/tcpip.LingerOption" } @@ -2362,6 +2423,7 @@ func (i *IPForwardingStats) StateFields() []string { "NoMulticastPendingQueueBufferSpace", "OutgoingDeviceNoBufferSpace", "Errors", + "OutgoingDeviceClosedForSend", } } @@ -2383,6 +2445,7 @@ func (i *IPForwardingStats) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(10, &i.NoMulticastPendingQueueBufferSpace) stateSinkObject.Save(11, &i.OutgoingDeviceNoBufferSpace) stateSinkObject.Save(12, &i.Errors) + stateSinkObject.Save(13, &i.OutgoingDeviceClosedForSend) } func (i *IPForwardingStats) afterLoad(context.Context) {} @@ -2402,6 +2465,7 @@ func (i *IPForwardingStats) StateLoad(ctx context.Context, stateSourceObject sta stateSourceObject.Load(10, &i.NoMulticastPendingQueueBufferSpace) stateSourceObject.Load(11, &i.OutgoingDeviceNoBufferSpace) stateSourceObject.Load(12, &i.Errors) + stateSourceObject.Load(13, &i.OutgoingDeviceClosedForSend) } func (i *IPStats) StateTypeName() string { @@ -3135,7 +3199,6 @@ func (j *jobInstance) StateTypeName() string { func (j *jobInstance) StateFields() []string { return []string{ - "timer", "earlyReturn", } } @@ -3145,16 +3208,14 @@ func (j *jobInstance) beforeSave() {} // +checklocksignore func (j *jobInstance) StateSave(stateSinkObject state.Sink) { j.beforeSave() - stateSinkObject.Save(0, &j.timer) - stateSinkObject.Save(1, &j.earlyReturn) + stateSinkObject.Save(0, &j.earlyReturn) } func (j *jobInstance) afterLoad(context.Context) {} // +checklocksignore func (j *jobInstance) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &j.timer) - stateSourceObject.Load(1, &j.earlyReturn) + stateSourceObject.Load(0, &j.earlyReturn) } func (j *Job) StateTypeName() string { @@ -3230,6 +3291,7 @@ func init() { state.Register((*ErrWouldBlock)(nil)) state.Register((*ErrMissingRequiredFields)(nil)) state.Register((*ErrMulticastInputCannotBeOutput)(nil)) + state.Register((*ErrEndpointBusy)(nil)) state.Register((*RouteList)(nil)) state.Register((*RouteEntry)(nil)) state.Register((*sockErrorList)(nil)) @@ -3238,7 +3300,6 @@ func init() { state.Register((*LocalSockError)(nil)) state.Register((*SockError)(nil)) state.Register((*stdClock)(nil)) - state.Register((*stdTimer)(nil)) state.Register((*MonotonicTime)(nil)) state.Register((*Address)(nil)) state.Register((*AddressMask)(nil)) @@ -3250,6 +3311,8 @@ func init() { state.Register((*TCPSendBufferSizeRangeOption)(nil)) state.Register((*TCPReceiveBufferSizeRangeOption)(nil)) state.Register((*ICMPv6Filter)(nil)) + state.Register((*TpacketReq)(nil)) + state.Register((*TpacketStats)(nil)) state.Register((*LingerOption)(nil)) state.Register((*IPPacketInfo)(nil)) state.Register((*IPv6PacketInfo)(nil)) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/timer.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/timer.go index 28bc2897..2f2bed18 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/timer.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/timer.go @@ -59,7 +59,7 @@ import ( // // +stateify savable type jobInstance struct { - timer Timer + timer Timer `state:"nosave"` // Used to inform the timer to early return when it gets stopped while the // lock the timer tries to obtain when fired is held (T1 is a goroutine that diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint.go index 988604fc..c7924d53 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint.go @@ -57,7 +57,7 @@ type endpoint struct { // The following fields are initialized at creation time and are // immutable. - stack *stack.Stack `state:"manual"` + stack *stack.Stack transProto tcpip.TransportProtocolNumber waiterQueue *waiter.Queue net network.Endpoint diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint_state.go index 134797e8..ffe9eb35 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/endpoint_state.go @@ -19,6 +19,7 @@ import ( "fmt" "time" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport" @@ -36,7 +37,11 @@ func (p *icmpPacket) loadReceivedAt(_ context.Context, nsec int64) { // afterLoad is invoked by stateify. func (e *endpoint) afterLoad(ctx context.Context) { - stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + if e.stack.IsSaveRestoreEnabled() { + e.stack.RegisterRestoredEndpoint(e) + } else { + stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + } } // beforeSave is invoked by stateify. @@ -47,13 +52,24 @@ func (e *endpoint) beforeSave() { // Restore implements tcpip.RestoredEndpoint.Restore. func (e *endpoint) Restore(s *stack.Stack) { - e.thaw() + if err := e.net.Resume(s); err != nil { + log.Warningf("Closing the ICMP endpoint as it cannot be restored, err: %v", err) + e.Close() + return + } - e.net.Resume(s) + e.thaw() + if e.stack.IsSaveRestoreEnabled() { + e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) + return + } e.stack = s e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) + e.mu.Lock() + defer e.mu.Unlock() + switch state := e.net.State(); state { case transport.DatagramEndpointStateInitial, transport.DatagramEndpointStateClosed: case transport.DatagramEndpointStateBound, transport.DatagramEndpointStateConnected: diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/icmp_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/icmp_state_autogen.go index ee6a8c2e..af7692bf 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/icmp_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/icmp_state_autogen.go @@ -29,8 +29,8 @@ func (p *icmpPacket) beforeSave() {} // +checklocksignore func (p *icmpPacket) StateSave(stateSinkObject state.Sink) { p.beforeSave() - var receivedAtValue int64 - receivedAtValue = p.saveReceivedAt() + receivedAtValue := p.saveReceivedAt() + _ = (int64)(receivedAtValue) stateSinkObject.SaveValue(4, receivedAtValue) stateSinkObject.Save(0, &p.icmpPacketEntry) stateSinkObject.Save(1, &p.senderAddress) @@ -60,6 +60,7 @@ func (e *endpoint) StateTypeName() string { func (e *endpoint) StateFields() []string { return []string{ "DefaultSocketOptionsHandler", + "stack", "transProto", "waiterQueue", "net", @@ -78,33 +79,35 @@ func (e *endpoint) StateFields() []string { func (e *endpoint) StateSave(stateSinkObject state.Sink) { e.beforeSave() stateSinkObject.Save(0, &e.DefaultSocketOptionsHandler) - stateSinkObject.Save(1, &e.transProto) - stateSinkObject.Save(2, &e.waiterQueue) - stateSinkObject.Save(3, &e.net) - stateSinkObject.Save(4, &e.stats) - stateSinkObject.Save(5, &e.ops) - stateSinkObject.Save(6, &e.rcvReady) - stateSinkObject.Save(7, &e.rcvList) - stateSinkObject.Save(8, &e.rcvBufSize) - stateSinkObject.Save(9, &e.rcvClosed) - stateSinkObject.Save(10, &e.frozen) - stateSinkObject.Save(11, &e.ident) + stateSinkObject.Save(1, &e.stack) + stateSinkObject.Save(2, &e.transProto) + stateSinkObject.Save(3, &e.waiterQueue) + stateSinkObject.Save(4, &e.net) + stateSinkObject.Save(5, &e.stats) + stateSinkObject.Save(6, &e.ops) + stateSinkObject.Save(7, &e.rcvReady) + stateSinkObject.Save(8, &e.rcvList) + stateSinkObject.Save(9, &e.rcvBufSize) + stateSinkObject.Save(10, &e.rcvClosed) + stateSinkObject.Save(11, &e.frozen) + stateSinkObject.Save(12, &e.ident) } // +checklocksignore func (e *endpoint) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &e.DefaultSocketOptionsHandler) - stateSourceObject.Load(1, &e.transProto) - stateSourceObject.Load(2, &e.waiterQueue) - stateSourceObject.Load(3, &e.net) - stateSourceObject.Load(4, &e.stats) - stateSourceObject.Load(5, &e.ops) - stateSourceObject.Load(6, &e.rcvReady) - stateSourceObject.Load(7, &e.rcvList) - stateSourceObject.Load(8, &e.rcvBufSize) - stateSourceObject.Load(9, &e.rcvClosed) - stateSourceObject.Load(10, &e.frozen) - stateSourceObject.Load(11, &e.ident) + stateSourceObject.Load(1, &e.stack) + stateSourceObject.Load(2, &e.transProto) + stateSourceObject.Load(3, &e.waiterQueue) + stateSourceObject.Load(4, &e.net) + stateSourceObject.Load(5, &e.stats) + stateSourceObject.Load(6, &e.ops) + stateSourceObject.Load(7, &e.rcvReady) + stateSourceObject.Load(8, &e.rcvList) + stateSourceObject.Load(9, &e.rcvBufSize) + stateSourceObject.Load(10, &e.rcvClosed) + stateSourceObject.Load(11, &e.frozen) + stateSourceObject.Load(12, &e.ident) stateSourceObject.AfterLoad(func() { e.afterLoad(ctx) }) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/protocol.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/protocol.go index 8bca0fa5..392aeecb 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/protocol.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/icmp/protocol.go @@ -128,6 +128,9 @@ func (*protocol) Pause() {} // Resume implements stack.TransportProtocol.Resume. func (*protocol) Resume() {} +// Restore implements stack.TransportProtocol.Restore. +func (*protocol) Restore() {} + // Parse implements stack.TransportProtocol.Parse. func (*protocol) Parse(pkt *stack.PacketBuffer) bool { // Right now, the Parse() method is tied to enabled protocols passed into diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint.go index 9b77ae36..7ef8e8f6 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint.go @@ -35,7 +35,7 @@ import ( // +stateify savable type Endpoint struct { // The following fields must only be set once then never changed. - stack *stack.Stack `state:"manual"` + stack *stack.Stack ops *tcpip.SocketOptions netProto tcpip.NetworkProtocolNumber transProto tcpip.TransportProtocolNumber @@ -53,7 +53,7 @@ type Endpoint struct { // +checklocks:mu effectiveNetProto tcpip.NetworkProtocolNumber // +checklocks:mu - connectedRoute *stack.Route `state:"manual"` + connectedRoute *stack.Route `state:"nosave"` // +checklocks:mu multicastMemberships map[multicastMembership]struct{} // +checklocks:mu @@ -66,13 +66,16 @@ type Endpoint struct { // TODO(https://gvisor.dev/issue/6389): Use different fields for IPv4/IPv6. // +checklocks:mu multicastAddr tcpip.Address - // TODO(https://gvisor.dev/issue/6389): Use different fields for IPv4/IPv6. // +checklocks:mu multicastNICID tcpip.NICID // +checklocks:mu + ipv6MulticastNICID tcpip.NICID + // +checklocks:mu ipv4TOS uint8 // +checklocks:mu ipv6TClass uint8 + // +checklocks:mu + pmtud tcpip.PMTUDStrategy // Lock ordering: mu > infoMu. infoMu sync.RWMutex `state:"nosave"` @@ -181,7 +184,11 @@ func (e *Endpoint) Close() { } for mem := range e.multicastMemberships { - e.stack.LeaveGroup(e.netProto, mem.nicID, mem.multicastAddr) + proto, err := e.multicastNetProto(mem.multicastAddr) + if err != nil { + panic("non multicast address in an existing membership") + } + e.stack.LeaveGroup(proto, mem.nicID, mem.multicastAddr) } e.multicastMemberships = nil @@ -229,6 +236,7 @@ type WriteContext struct { route *stack.Route ttl uint8 tos uint8 + df bool } func (c *WriteContext) MTU() uint32 { @@ -277,26 +285,23 @@ func (c *WriteContext) TryNewPacketBuffer(reserveHdrBytes int, data buffer.Buffe return c.newPacketBufferLocked(reserveHdrBytes, data) } -// TryNewPacketBufferFromPayloader returns a new packet buffer iff the endpoint's send buffer +// TryNewPacketBufferFromPayloader returns a new packet buffer if the endpoint's send buffer // is not full. Otherwise, data from `payloader` isn't read. -// -// If this method returns nil, the caller should wait for the endpoint to become -// writable. -func (c *WriteContext) TryNewPacketBufferFromPayloader(reserveHdrBytes int, payloader tcpip.Payloader) *stack.PacketBuffer { +func (c *WriteContext) TryNewPacketBufferFromPayloader(reserveHdrBytes int, payloader tcpip.Payloader) (*stack.PacketBuffer, tcpip.Error) { e := c.e e.sendBufferSizeInUseMu.Lock() defer e.sendBufferSizeInUseMu.Unlock() if !e.hasSendSpaceRLocked() { - return nil + return nil, &tcpip.ErrWouldBlock{} } var data buffer.Buffer if _, err := data.WriteFromReader(payloader, int64(payloader.Len())); err != nil { data.Release() - return nil + return nil, &tcpip.ErrBadBuffer{} } - return c.newPacketBufferLocked(reserveHdrBytes, data) + return c.newPacketBufferLocked(reserveHdrBytes, data), nil } // +checklocks:c.e.sendBufferSizeInUseMu @@ -310,6 +315,13 @@ func (c *WriteContext) newPacketBufferLocked(reserveHdrBytes int, data buffer.Bu // This matches Linux behaviour: // https://github.com/torvalds/linux/blob/38d741cb70b/include/net/sock.h#L2519 // https://github.com/torvalds/linux/blob/38d741cb70b/net/core/sock.c#L2588 + var expOptVal uint16 + if nic, err := c.e.stack.GetNICByID(c.route.OutgoingNIC()); err == nil && nic.GetExperimentIPOptionEnabled() { + expOptVal = c.e.ops.GetExperimentOptionValue() + } + if c.route.NetProto() == header.IPv6ProtocolNumber && expOptVal != 0 { + reserveHdrBytes += header.IPv6ExperimentHdrLength + } pktSize := int64(reserveHdrBytes) + int64(data.Size()) e.sendBufferSizeInUse += pktSize @@ -344,10 +356,17 @@ func (c *WriteContext) WritePacket(pkt *stack.PacketBuffer, headerIncluded bool) return c.route.WriteHeaderIncludedPacket(pkt) } + var expOptVal uint16 + if nic, err := c.e.stack.GetNICByID(c.route.OutgoingNIC()); err == nil && nic.GetExperimentIPOptionEnabled() { + expOptVal = c.e.ops.GetExperimentOptionValue() + } + err := c.route.WritePacket(stack.NetworkHeaderParams{ - Protocol: c.e.transProto, - TTL: c.ttl, - TOS: c.tos, + Protocol: c.e.transProto, + TTL: c.ttl, + TOS: c.tos, + DF: c.df, + ExperimentOptionValue: expOptVal, }, pkt) if _, ok := err.(*tcpip.ErrNoBufferSpace); ok { @@ -420,8 +439,8 @@ func (e *Endpoint) AcquireContextForWrite(opts tcpip.WriteOptions) (WriteContext route := e.connectedRoute to := opts.To info := e.Info() - switch { - case to == nil: + switch to { + case nil: // If the user doesn't specify a destination, they should have // connected to another address. if e.State() != transport.DatagramEndpointStateConnected { @@ -553,11 +572,28 @@ func (e *Endpoint) AcquireContextForWrite(opts tcpip.WriteOptions) (WriteContext panic(fmt.Sprintf("invalid protocol number = %d", netProto)) } + // Set the DF (Don't Fragment) bit based on the PMTUD strategy, + // matching TCP behavior in connect.go. + // Note: In gVisor, WANT and DO are treated identically (both set DF). + // Linux kernel differentiates them (WANT allows local fragmentation, + // DO returns EMSGSIZE), but gVisor's IPv4 layer always allows local + // fragmentation for locally-generated packets regardless of DF + // (see gvisor.dev/issue/5919). + // + // PROBE also sets DF, matching Linux ip_dont_fragment(). In Linux, + // PROBE differs from DO only in that it ignores incoming ICMP + // "Fragmentation Needed" messages (i.e. does not update the cached + // route PMTU). Since gVisor does not implement ICMP-based PMTU + // feedback for transport sockets, PROBE and DO are functionally + // equivalent here. + df := e.pmtud == tcpip.PMTUDiscoveryWant || e.pmtud == tcpip.PMTUDiscoveryDo || e.pmtud == tcpip.PMTUDiscoveryProbe + return WriteContext{ e: e, route: route, ttl: ttl, tos: tos, + df: df, }, nil } @@ -600,7 +636,7 @@ func (e *Endpoint) connectRouteRLocked(nicID tcpip.NICID, localAddr tcpip.Addres localAddr = tcpip.Address{} } - if header.IsV4MulticastAddress(addr.Addr) || header.IsV6MulticastAddress(addr.Addr) { + if header.IsV4MulticastAddress(addr.Addr) { if nicID == 0 { nicID = e.multicastNICID } @@ -608,6 +644,9 @@ func (e *Endpoint) connectRouteRLocked(nicID tcpip.NICID, localAddr tcpip.Addres localAddr = e.multicastAddr } } + if header.IsV6MulticastAddress(addr.Addr) && nicID == 0 { + nicID = e.ipv6MulticastNICID + } } // Find a route to the desired destination. @@ -822,9 +861,18 @@ func (e *Endpoint) GetRemoteAddress() (tcpip.FullAddress, bool) { func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { switch opt { case tcpip.MTUDiscoverOption: - // Return not supported if the value is not disabling path - // MTU discovery. - if tcpip.PMTUDStrategy(v) != tcpip.PMTUDiscoveryDont { + // Store PMTU discovery settings. The DF bit on outgoing + // packets is set accordingly in AcquireContextForWrite. + // PROBE is accepted alongside DO/WANT/DONT. In Linux, + // PROBE sets DF but ignores ICMP-based PMTU updates; + // since gVisor lacks ICMP PMTU feedback, it behaves + // identically to DO. + switch tcpip.PMTUDStrategy(v) { + case tcpip.PMTUDiscoveryWant, tcpip.PMTUDiscoveryDont, tcpip.PMTUDiscoveryDo, tcpip.PMTUDiscoveryProbe: + e.mu.Lock() + e.pmtud = tcpip.PMTUDStrategy(v) + e.mu.Unlock() + default: return &tcpip.ErrNotSupported{} } @@ -852,6 +900,18 @@ func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { e.mu.Lock() e.ipv6TClass = uint8(v) e.mu.Unlock() + + case tcpip.IPv6MulticastInterfaceOption: + if v != 0 && !e.stack.CheckNIC(tcpip.NICID(v)) { + return &tcpip.ErrUnknownNICID{} + } + e.mu.Lock() + defer e.mu.Unlock() + nic := tcpip.NICID(v) + if info := e.Info(); info.BindNICID != 0 && info.BindNICID != nic { + return &tcpip.ErrInvalidEndpointState{} + } + e.ipv6MulticastNICID = nic } return nil @@ -861,8 +921,10 @@ func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { func (e *Endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.MTUDiscoverOption: - // The only supported setting is path MTU discovery disabled. - return int(tcpip.PMTUDiscoveryDont), nil + e.mu.Lock() + v := int(e.pmtud) + e.mu.Unlock() + return v, nil case tcpip.MulticastTTLOption: e.mu.Lock() @@ -894,11 +956,30 @@ func (e *Endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { e.mu.RUnlock() return v, nil + case tcpip.IPv6MulticastInterfaceOption: + e.mu.RLock() + v := int(e.ipv6MulticastNICID) + e.mu.RUnlock() + return v, nil + default: return -1, &tcpip.ErrUnknownProtocolOption{} } } +// multicastNetProto returns the network protocol of a given multicast address. +// Returns an error if the address is not a multicast address. +func (e *Endpoint) multicastNetProto(addr tcpip.Address) (tcpip.NetworkProtocolNumber, tcpip.Error) { + switch { + case header.IsV4MulticastAddress(addr): + return header.IPv4ProtocolNumber, nil + case header.IsV6MulticastAddress(addr): + return header.IPv6ProtocolNumber, nil + default: + return 0, &tcpip.ErrInvalidOptionValue{} + } +} + // SetSockOpt sets the socket option. func (e *Endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { switch v := opt.(type) { @@ -939,21 +1020,23 @@ func (e *Endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { e.multicastAddr = addr case *tcpip.AddMembershipOption: - if !(header.IsV4MulticastAddress(v.MulticastAddr) && e.netProto == header.IPv4ProtocolNumber) && !(header.IsV6MulticastAddress(v.MulticastAddr) && e.netProto == header.IPv6ProtocolNumber) { - return &tcpip.ErrInvalidOptionValue{} + // Allowing IP_ADD_MEMBERSHIP on an ipv6 socket matches Linux behavior: + // https://github.com/torvalds/linux/blob/cec1e6e5d1a/net/ipv6/ipv6_sockglue.c#L964 + proto, err := e.multicastNetProto(v.MulticastAddr) + if err != nil { + return err } nicID := v.NIC - if v.InterfaceAddr.Unspecified() { if nicID == 0 { - if r, err := e.stack.FindRoute(0, tcpip.Address{}, v.MulticastAddr, e.netProto, false /* multicastLoop */); err == nil { + if r, err := e.stack.FindRoute(0, tcpip.Address{}, v.MulticastAddr, proto, false /* multicastLoop */); err == nil { nicID = r.NICID() r.Release() } } } else { - nicID = e.stack.CheckLocalAddress(nicID, e.netProto, v.InterfaceAddr) + nicID = e.stack.CheckLocalAddress(nicID, proto, v.InterfaceAddr) } if nicID == 0 { return &tcpip.ErrUnknownDevice{} @@ -968,27 +1051,28 @@ func (e *Endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { return &tcpip.ErrPortInUse{} } - if err := e.stack.JoinGroup(e.netProto, nicID, v.MulticastAddr); err != nil { + if err := e.stack.JoinGroup(proto, nicID, v.MulticastAddr); err != nil { return err } e.multicastMemberships[memToInsert] = struct{}{} case *tcpip.RemoveMembershipOption: - if !(header.IsV4MulticastAddress(v.MulticastAddr) && e.netProto == header.IPv4ProtocolNumber) && !(header.IsV6MulticastAddress(v.MulticastAddr) && e.netProto == header.IPv6ProtocolNumber) { - return &tcpip.ErrInvalidOptionValue{} + proto, err := e.multicastNetProto(v.MulticastAddr) + if err != nil { + return err } nicID := v.NIC if v.InterfaceAddr.Unspecified() { if nicID == 0 { - if r, err := e.stack.FindRoute(0, tcpip.Address{}, v.MulticastAddr, e.netProto, false /* multicastLoop */); err == nil { + if r, err := e.stack.FindRoute(0, tcpip.Address{}, v.MulticastAddr, proto, false /* multicastLoop */); err == nil { nicID = r.NICID() r.Release() } } } else { - nicID = e.stack.CheckLocalAddress(nicID, e.netProto, v.InterfaceAddr) + nicID = e.stack.CheckLocalAddress(nicID, proto, v.InterfaceAddr) } if nicID == 0 { return &tcpip.ErrUnknownDevice{} @@ -1003,7 +1087,7 @@ func (e *Endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { return &tcpip.ErrBadLocalAddress{} } - if err := e.stack.LeaveGroup(e.netProto, nicID, v.MulticastAddr); err != nil { + if err := e.stack.LeaveGroup(proto, nicID, v.MulticastAddr); err != nil { return err } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint_state.go index d4950296..c8ca6839 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/endpoint_state.go @@ -23,15 +23,18 @@ import ( ) // Resume implements tcpip.ResumableEndpoint.Resume. -func (e *Endpoint) Resume(s *stack.Stack) { +func (e *Endpoint) Resume(s *stack.Stack) error { e.mu.Lock() defer e.mu.Unlock() e.stack = s - for m := range e.multicastMemberships { - if err := e.stack.JoinGroup(e.netProto, m.nicID, m.multicastAddr); err != nil { - panic(fmt.Sprintf("e.stack.JoinGroup(%d, %d, %s): %s", e.netProto, m.nicID, m.multicastAddr, err)) + proto, err := e.multicastNetProto(m.multicastAddr) + if err != nil { + return fmt.Errorf("non multicast address in an existing membership during Resume: %s", err) + } + if err := e.stack.JoinGroup(proto, m.nicID, m.multicastAddr); err != nil { + return fmt.Errorf("e.stack.JoinGroup(%d, %d, %s): %s", proto, m.nicID, m.multicastAddr, err) } } @@ -42,17 +45,22 @@ func (e *Endpoint) Resume(s *stack.Stack) { case transport.DatagramEndpointStateBound: if info.ID.LocalAddress.BitLen() != 0 && !e.isBroadcastOrMulticast(info.RegisterNICID, e.effectiveNetProto, info.ID.LocalAddress) { if e.stack.CheckLocalAddress(info.RegisterNICID, e.effectiveNetProto, info.ID.LocalAddress) == 0 { - panic(fmt.Sprintf("got e.stack.CheckLocalAddress(%d, %d, %s) = 0, want != 0", info.RegisterNICID, e.effectiveNetProto, info.ID.LocalAddress)) + return fmt.Errorf("got e.stack.CheckLocalAddress(%d, %d, %s) = 0, want != 0", info.RegisterNICID, e.effectiveNetProto, info.ID.LocalAddress) } } case transport.DatagramEndpointStateConnected: var err tcpip.Error multicastLoop := e.ops.GetMulticastLoop() + // Release the connectedRoute if present. + if e.connectedRoute != nil { + e.connectedRoute.Release() + } e.connectedRoute, err = e.stack.FindRoute(info.RegisterNICID, info.ID.LocalAddress, info.ID.RemoteAddress, e.effectiveNetProto, multicastLoop) if err != nil { - panic(fmt.Sprintf("e.stack.FindRoute(%d, %s, %s, %d, %t): %s", info.RegisterNICID, info.ID.LocalAddress, info.ID.RemoteAddress, e.effectiveNetProto, multicastLoop, err)) + return fmt.Errorf("e.stack.FindRoute(%d, %s, %s, %d, %t): %s", info.RegisterNICID, info.ID.LocalAddress, info.ID.RemoteAddress, e.effectiveNetProto, multicastLoop, err) } default: panic(fmt.Sprintf("unhandled state = %s", state)) } + return nil } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/network_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/network_state_autogen.go index f3e38fc8..9c765430 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/network_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/internal/network/network_state_autogen.go @@ -14,6 +14,7 @@ func (e *Endpoint) StateTypeName() string { func (e *Endpoint) StateFields() []string { return []string{ + "stack", "ops", "netProto", "transProto", @@ -28,8 +29,10 @@ func (e *Endpoint) StateFields() []string { "multicastTTL", "multicastAddr", "multicastNICID", + "ipv6MulticastNICID", "ipv4TOS", "ipv6TClass", + "pmtud", "info", "state", } @@ -40,48 +43,54 @@ func (e *Endpoint) beforeSave() {} // +checklocksignore func (e *Endpoint) StateSave(stateSinkObject state.Sink) { e.beforeSave() - stateSinkObject.Save(0, &e.ops) - stateSinkObject.Save(1, &e.netProto) - stateSinkObject.Save(2, &e.transProto) - stateSinkObject.Save(3, &e.waiterQueue) - stateSinkObject.Save(4, &e.wasBound) - stateSinkObject.Save(5, &e.owner) - stateSinkObject.Save(6, &e.writeShutdown) - stateSinkObject.Save(7, &e.effectiveNetProto) - stateSinkObject.Save(8, &e.multicastMemberships) - stateSinkObject.Save(9, &e.ipv4TTL) - stateSinkObject.Save(10, &e.ipv6HopLimit) - stateSinkObject.Save(11, &e.multicastTTL) - stateSinkObject.Save(12, &e.multicastAddr) - stateSinkObject.Save(13, &e.multicastNICID) - stateSinkObject.Save(14, &e.ipv4TOS) - stateSinkObject.Save(15, &e.ipv6TClass) - stateSinkObject.Save(16, &e.info) - stateSinkObject.Save(17, &e.state) + stateSinkObject.Save(0, &e.stack) + stateSinkObject.Save(1, &e.ops) + stateSinkObject.Save(2, &e.netProto) + stateSinkObject.Save(3, &e.transProto) + stateSinkObject.Save(4, &e.waiterQueue) + stateSinkObject.Save(5, &e.wasBound) + stateSinkObject.Save(6, &e.owner) + stateSinkObject.Save(7, &e.writeShutdown) + stateSinkObject.Save(8, &e.effectiveNetProto) + stateSinkObject.Save(9, &e.multicastMemberships) + stateSinkObject.Save(10, &e.ipv4TTL) + stateSinkObject.Save(11, &e.ipv6HopLimit) + stateSinkObject.Save(12, &e.multicastTTL) + stateSinkObject.Save(13, &e.multicastAddr) + stateSinkObject.Save(14, &e.multicastNICID) + stateSinkObject.Save(15, &e.ipv6MulticastNICID) + stateSinkObject.Save(16, &e.ipv4TOS) + stateSinkObject.Save(17, &e.ipv6TClass) + stateSinkObject.Save(18, &e.pmtud) + stateSinkObject.Save(19, &e.info) + stateSinkObject.Save(20, &e.state) } func (e *Endpoint) afterLoad(context.Context) {} // +checklocksignore func (e *Endpoint) StateLoad(ctx context.Context, stateSourceObject state.Source) { - stateSourceObject.Load(0, &e.ops) - stateSourceObject.Load(1, &e.netProto) - stateSourceObject.Load(2, &e.transProto) - stateSourceObject.Load(3, &e.waiterQueue) - stateSourceObject.Load(4, &e.wasBound) - stateSourceObject.Load(5, &e.owner) - stateSourceObject.Load(6, &e.writeShutdown) - stateSourceObject.Load(7, &e.effectiveNetProto) - stateSourceObject.Load(8, &e.multicastMemberships) - stateSourceObject.Load(9, &e.ipv4TTL) - stateSourceObject.Load(10, &e.ipv6HopLimit) - stateSourceObject.Load(11, &e.multicastTTL) - stateSourceObject.Load(12, &e.multicastAddr) - stateSourceObject.Load(13, &e.multicastNICID) - stateSourceObject.Load(14, &e.ipv4TOS) - stateSourceObject.Load(15, &e.ipv6TClass) - stateSourceObject.Load(16, &e.info) - stateSourceObject.Load(17, &e.state) + stateSourceObject.Load(0, &e.stack) + stateSourceObject.Load(1, &e.ops) + stateSourceObject.Load(2, &e.netProto) + stateSourceObject.Load(3, &e.transProto) + stateSourceObject.Load(4, &e.waiterQueue) + stateSourceObject.Load(5, &e.wasBound) + stateSourceObject.Load(6, &e.owner) + stateSourceObject.Load(7, &e.writeShutdown) + stateSourceObject.Load(8, &e.effectiveNetProto) + stateSourceObject.Load(9, &e.multicastMemberships) + stateSourceObject.Load(10, &e.ipv4TTL) + stateSourceObject.Load(11, &e.ipv6HopLimit) + stateSourceObject.Load(12, &e.multicastTTL) + stateSourceObject.Load(13, &e.multicastAddr) + stateSourceObject.Load(14, &e.multicastNICID) + stateSourceObject.Load(15, &e.ipv6MulticastNICID) + stateSourceObject.Load(16, &e.ipv4TOS) + stateSourceObject.Load(17, &e.ipv6TClass) + stateSourceObject.Load(18, &e.pmtud) + stateSourceObject.Load(19, &e.info) + stateSourceObject.Load(20, &e.state) } func (m *multicastMembership) StateTypeName() string { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint.go index 9166bca6..acacb17e 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint.go @@ -26,16 +26,25 @@ package packet import ( "io" + "math" "time" "gvisor.dev/gvisor/pkg/buffer" - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" ) +type tpacketVersion int + +const ( + tpacketVersion1 tpacketVersion = iota + tpacketVersion2 +) + +var _ stack.MappablePacketEndpoint = (*endpoint)(nil) + // +stateify savable type packet struct { packetEntry @@ -54,8 +63,9 @@ type packet struct { // // Lock order: // -// endpoint.mu -// endpoint.rcvMu +// endpoint.mu +// endpoint.rcvMu +// endpoint.packetMmapMu // // +stateify savable type endpoint struct { @@ -63,14 +73,14 @@ type endpoint struct { // The following fields are initialized at creation time and are // immutable. - stack *stack.Stack `state:"manual"` + stack *stack.Stack waiterQueue *waiter.Queue cooked bool ops tcpip.SocketOptions stats tcpip.TransportEndpointStats // The following fields are used to manage the receive queue. - rcvMu sync.Mutex `state:"nosave"` + rcvMu rcvMutex `state:"nosave"` // +checklocks:rcvMu rcvList packetList // +checklocks:rcvMu @@ -80,7 +90,7 @@ type endpoint struct { // +checklocks:rcvMu rcvDisabled bool - mu sync.RWMutex `state:"nosave"` + mu endpointRWMutex `state:"nosave"` // +checklocks:mu closed bool // +checklocks:mu @@ -88,9 +98,17 @@ type endpoint struct { // +checklocks:mu boundNIC tcpip.NICID - lastErrorMu sync.Mutex `state:"nosave"` + lastErrorMu lastErrorMutex `state:"nosave"` // +checklocks:lastErrorMu lastError tcpip.Error + + packetMmapMu packetMmapRWMutex `state:"nosave"` + // +checklocks:packetMmapMu + packetMMapVersion tpacketVersion + // +checklocks:packetMmapMu + packetMMapReserve int + // +checklocks:packetMmapMu + packetMMapEp stack.PacketMMapEndpoint } // NewEndpoint returns a new packet endpoint. @@ -129,13 +147,18 @@ func (ep *endpoint) Abort() { func (ep *endpoint) Close() { ep.mu.Lock() defer ep.mu.Unlock() - if ep.closed { return } - ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep) + ep.packetMmapMu.Lock() + if ep.packetMMapEp != nil { + ep.packetMMapEp.Close() + ep.packetMMapEp = nil + } + ep.packetMmapMu.Unlock() + ep.rcvMu.Lock() defer ep.rcvMu.Unlock() @@ -348,6 +371,11 @@ func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { // Determine whether the endpoint is readable. if (mask & waiter.ReadableEvents) != 0 { + ep.packetMmapMu.RLock() + if ep.packetMMapEp != nil { + result |= ep.packetMMapEp.Readiness(mask) + } + ep.packetMmapMu.RUnlock() ep.rcvMu.Lock() if !ep.rcvList.Empty() || ep.rcvClosed { result |= waiter.ReadableEvents @@ -358,13 +386,18 @@ func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { return result } -// SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be -// used with SetSockOpt, and this function always returns -// *tcpip.ErrNotSupported. +// SetSockOpt implements tcpip.Endpoint.SetSockOpt. func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { switch opt.(type) { case *tcpip.SocketDetachFilterOption: return nil + case *tcpip.TpacketReq: + ep.rcvMu.Lock() + defer ep.rcvMu.Unlock() + if !ep.rcvList.Empty() { + return &tcpip.ErrWouldBlock{} + } + return nil default: return &tcpip.ErrUnknownProtocolOption{} @@ -372,8 +405,37 @@ func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { } // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. -func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error { - return &tcpip.ErrUnknownProtocolOption{} +func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { + switch opt { + case tcpip.PacketMMapVersionOption: + ep.packetMmapMu.Lock() + defer ep.packetMmapMu.Unlock() + // We support up to TPACKET_V2. + version := tpacketVersion(v) + switch version { + case tpacketVersion1, tpacketVersion2: + if ep.packetMMapEp != nil { + return &tcpip.ErrEndpointBusy{} + } + ep.packetMMapVersion = version + return nil + default: + return &tcpip.ErrInvalidOptionValue{} + } + case tcpip.PacketMMapReserveOption: + ep.packetMmapMu.Lock() + defer ep.packetMmapMu.Unlock() + if ep.packetMMapEp != nil { + return &tcpip.ErrEndpointBusy{} + } + if uint32(v) > uint32(math.MaxInt32) { + return &tcpip.ErrInvalidOptionValue{} + } + ep.packetMMapReserve = v + return nil + default: + return &tcpip.ErrUnknownProtocolOption{} + } } func (ep *endpoint) LastError() tcpip.Error { @@ -393,8 +455,19 @@ func (ep *endpoint) UpdateLastError(err tcpip.Error) { } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error { - return &tcpip.ErrNotSupported{} +func (ep *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { + switch opt := opt.(type) { + case *tcpip.TpacketStats: + ep.packetMmapMu.RLock() + defer ep.packetMmapMu.RUnlock() + if ep.packetMMapEp == nil { + return nil + } + *opt = ep.packetMMapEp.Stats() + return nil + default: + return &tcpip.ErrUnknownProtocolOption{} + } } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. @@ -415,8 +488,31 @@ func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { } } -// HandlePacket implements stack.PacketEndpoint.HandlePacket. +// handlePacket implements stack.PacketEndpoint.HandlePacket func (ep *endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + ep.packetMmapMu.RLock() + if ep.packetMMapEp != nil { + if handled := ep.packetMMapEp.HandlePacket(nicID, netProto, pkt); handled { + ep.packetMmapMu.RUnlock() + return + } + } + ep.packetMmapMu.RUnlock() + + wasEmpty := ep.handlePacketInner(nicID, netProto, pkt) + + ep.stats.PacketsReceived.Increment() + // Notify waiters that there's data to be read. + if wasEmpty { + ep.waiterQueue.Notify(waiter.ReadableEvents) + } +} + +func (ep *endpoint) HandlePacketMMapCopy(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + _ = ep.handlePacketInner(nicID, netProto, pkt) +} + +func (ep *endpoint) handlePacketInner(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) bool { ep.rcvMu.Lock() // Drop the packet if our buffer is currently full. @@ -424,7 +520,7 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoc ep.rcvMu.Unlock() ep.stack.Stats().DroppedPackets.Increment() ep.stats.ReceiveErrors.ClosedReceiver.Increment() - return + return false } rcvBufSize := ep.ops.GetReceiveBufferSize() @@ -432,7 +528,7 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoc ep.rcvMu.Unlock() ep.stack.Stats().DroppedPackets.Increment() ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment() - return + return false } wasEmpty := ep.rcvBufSize == 0 @@ -464,13 +560,8 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtoc ep.rcvList.PushBack(&rcvdPkt) ep.rcvBufSize += rcvdPkt.data.Size() - ep.rcvMu.Unlock() - ep.stats.PacketsReceived.Increment() - // Notify waiters that there's data to be read. - if wasEmpty { - ep.waiterQueue.Notify(waiter.ReadableEvents) - } + return wasEmpty } // State implements socket.Socket.State. @@ -497,3 +588,36 @@ func (*endpoint) SetOwner(tcpip.PacketOwner) {} func (ep *endpoint) SocketOptions() *tcpip.SocketOptions { return &ep.ops } + +// GetPacketMMapOpts implements stack.MappablePacketEndpoint.GetPacketMMapOpts. +func (ep *endpoint) GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) stack.PacketMMapOpts { + ep.packetMmapMu.Lock() + defer ep.packetMmapMu.Unlock() + + return stack.PacketMMapOpts{ + Req: req, + IsRx: isRx, + Cooked: ep.cooked, + Stack: ep.stack, + Wq: ep.waiterQueue, + PacketEndpoint: ep, + Version: int(ep.packetMMapVersion), + Reserve: uint32(ep.packetMMapReserve), + } +} + +// SetPacketMMapEndpoint implements +// stack.MappablePacketEndpoint.SetPacketMMapEndpoint. +func (ep *endpoint) SetPacketMMapEndpoint(m stack.PacketMMapEndpoint) { + ep.packetMmapMu.Lock() + defer ep.packetMmapMu.Unlock() + ep.packetMMapEp = m +} + +// GetPacketMMapEndpoint implements +// stack.MappablePacketEndpoint.GetPacketMMapEndpoint. +func (ep *endpoint) GetPacketMMapEndpoint() stack.PacketMMapEndpoint { + ep.packetMmapMu.RLock() + defer ep.packetMmapMu.RUnlock() + return ep.packetMMapEp +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_mutex.go new file mode 100644 index 00000000..1fb86b87 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_mutex.go @@ -0,0 +1,96 @@ +package packet + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// RWMutex is sync.RWMutex with the correctness validator. +type endpointRWMutex struct { + mu sync.RWMutex +} + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var endpointlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type endpointlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *endpointRWMutex) Lock() { + locking.AddGLock(endpointprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *endpointRWMutex) NestedLock(i endpointlockNameIndex) { + locking.AddGLock(endpointprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *endpointRWMutex) Unlock() { + m.mu.Unlock() + locking.DelGLock(endpointprefixIndex, -1) +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *endpointRWMutex) NestedUnlock(i endpointlockNameIndex) { + m.mu.Unlock() + locking.DelGLock(endpointprefixIndex, int(i)) +} + +// RLock locks m for reading. +// +checklocksignore +func (m *endpointRWMutex) RLock() { + locking.AddGLock(endpointprefixIndex, -1) + m.mu.RLock() +} + +// RUnlock undoes a single RLock call. +// +checklocksignore +func (m *endpointRWMutex) RUnlock() { + m.mu.RUnlock() + locking.DelGLock(endpointprefixIndex, -1) +} + +// RLockBypass locks m for reading without executing the validator. +// +checklocksignore +func (m *endpointRWMutex) RLockBypass() { + m.mu.RLock() +} + +// RUnlockBypass undoes a single RLockBypass call. +// +checklocksignore +func (m *endpointRWMutex) RUnlockBypass() { + m.mu.RUnlock() +} + +// DowngradeLock atomically unlocks rw for writing and locks it for reading. +// +checklocksignore +func (m *endpointRWMutex) DowngradeLock() { + m.mu.DowngradeLock() +} + +var endpointprefixIndex *locking.MutexClass + +// DO NOT REMOVE: The following function is automatically replaced. +func endpointinitLockNames() {} + +func init() { + endpointinitLockNames() + endpointprefixIndex = locking.NewMutexClass(reflect.TypeFor[endpointRWMutex](), endpointlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_rcv_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_rcv_mutex.go new file mode 100644 index 00000000..2ce31390 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_rcv_mutex.go @@ -0,0 +1,64 @@ +package packet + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type rcvMutex struct { + mu sync.Mutex +} + +var rcvprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var rcvlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type rcvlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *rcvMutex) Lock() { + locking.AddGLock(rcvprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *rcvMutex) NestedLock(i rcvlockNameIndex) { + locking.AddGLock(rcvprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *rcvMutex) Unlock() { + locking.DelGLock(rcvprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *rcvMutex) NestedUnlock(i rcvlockNameIndex) { + locking.DelGLock(rcvprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func rcvinitLockNames() {} + +func init() { + rcvinitLockNames() + rcvprefixIndex = locking.NewMutexClass(reflect.TypeFor[rcvMutex](), rcvlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_state.go index 16be7d6b..607a1c2e 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/endpoint_state.go @@ -16,9 +16,9 @@ package packet import ( "context" - "fmt" "time" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -36,21 +36,30 @@ func (p *packet) loadReceivedAt(_ context.Context, nsec int64) { // beforeSave is invoked by stateify. func (ep *endpoint) beforeSave() { ep.rcvMu.Lock() - defer ep.rcvMu.Unlock() ep.rcvDisabled = true + ep.rcvMu.Unlock() ep.stack.RegisterResumableEndpoint(ep) } // afterLoad is invoked by stateify. func (ep *endpoint) afterLoad(ctx context.Context) { + if !ep.stack.IsSaveRestoreEnabled() { + ep.mu.Lock() + ep.stack = stack.RestoreStackFromContext(ctx) + ep.mu.Unlock() + } + ep.stack.RegisterRestoredEndpoint(ep) +} + +// Restore implements tcpip.RestoredEndpoint.Restore. +func (ep *endpoint) Restore(_ *stack.Stack) { ep.mu.Lock() defer ep.mu.Unlock() - ep.stack = stack.RestoreStackFromContext(ctx) ep.ops.InitHandler(ep, ep.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) - if err := ep.stack.RegisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep); err != nil { - panic(fmt.Sprintf("RegisterPacketEndpoint(%d, %d, _): %s", ep.boundNIC, ep.boundNetProto, err)) + log.Warningf("RegisterPacketEndpoint(%d, %d, _) failed during restore with error: %s", ep.boundNIC, ep.boundNetProto, err) + return } ep.rcvMu.Lock() diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/last_error_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/last_error_mutex.go new file mode 100644 index 00000000..9dec3855 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/last_error_mutex.go @@ -0,0 +1,64 @@ +package packet + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type lastErrorMutex struct { + mu sync.Mutex +} + +var lastErrorprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var lastErrorlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type lastErrorlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *lastErrorMutex) Lock() { + locking.AddGLock(lastErrorprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *lastErrorMutex) NestedLock(i lastErrorlockNameIndex) { + locking.AddGLock(lastErrorprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *lastErrorMutex) Unlock() { + locking.DelGLock(lastErrorprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *lastErrorMutex) NestedUnlock(i lastErrorlockNameIndex) { + locking.DelGLock(lastErrorprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func lastErrorinitLockNames() {} + +func init() { + lastErrorinitLockNames() + lastErrorprefixIndex = locking.NewMutexClass(reflect.TypeFor[lastErrorMutex](), lastErrorlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_mmap_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_mmap_mutex.go new file mode 100644 index 00000000..3cbebd58 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_mmap_mutex.go @@ -0,0 +1,96 @@ +package packet + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// RWMutex is sync.RWMutex with the correctness validator. +type packetMmapRWMutex struct { + mu sync.RWMutex +} + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var packetMmaplockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type packetMmaplockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *packetMmapRWMutex) Lock() { + locking.AddGLock(packetMmapprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *packetMmapRWMutex) NestedLock(i packetMmaplockNameIndex) { + locking.AddGLock(packetMmapprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *packetMmapRWMutex) Unlock() { + m.mu.Unlock() + locking.DelGLock(packetMmapprefixIndex, -1) +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *packetMmapRWMutex) NestedUnlock(i packetMmaplockNameIndex) { + m.mu.Unlock() + locking.DelGLock(packetMmapprefixIndex, int(i)) +} + +// RLock locks m for reading. +// +checklocksignore +func (m *packetMmapRWMutex) RLock() { + locking.AddGLock(packetMmapprefixIndex, -1) + m.mu.RLock() +} + +// RUnlock undoes a single RLock call. +// +checklocksignore +func (m *packetMmapRWMutex) RUnlock() { + m.mu.RUnlock() + locking.DelGLock(packetMmapprefixIndex, -1) +} + +// RLockBypass locks m for reading without executing the validator. +// +checklocksignore +func (m *packetMmapRWMutex) RLockBypass() { + m.mu.RLock() +} + +// RUnlockBypass undoes a single RLockBypass call. +// +checklocksignore +func (m *packetMmapRWMutex) RUnlockBypass() { + m.mu.RUnlock() +} + +// DowngradeLock atomically unlocks rw for writing and locks it for reading. +// +checklocksignore +func (m *packetMmapRWMutex) DowngradeLock() { + m.mu.DowngradeLock() +} + +var packetMmapprefixIndex *locking.MutexClass + +// DO NOT REMOVE: The following function is automatically replaced. +func packetMmapinitLockNames() {} + +func init() { + packetMmapinitLockNames() + packetMmapprefixIndex = locking.NewMutexClass(reflect.TypeFor[packetMmapRWMutex](), packetMmaplockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_state_autogen.go index 7e2f7fda..3a913d3f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/packet/packet_state_autogen.go @@ -27,8 +27,8 @@ func (p *packet) beforeSave() {} // +checklocksignore func (p *packet) StateSave(stateSinkObject state.Sink) { p.beforeSave() - var receivedAtValue int64 - receivedAtValue = p.saveReceivedAt() + receivedAtValue := p.saveReceivedAt() + _ = (int64)(receivedAtValue) stateSinkObject.SaveValue(2, receivedAtValue) stateSinkObject.Save(0, &p.packetEntry) stateSinkObject.Save(1, &p.data) @@ -54,6 +54,7 @@ func (ep *endpoint) StateTypeName() string { func (ep *endpoint) StateFields() []string { return []string{ "DefaultSocketOptionsHandler", + "stack", "waiterQueue", "cooked", "ops", @@ -66,6 +67,9 @@ func (ep *endpoint) StateFields() []string { "boundNetProto", "boundNIC", "lastError", + "packetMMapVersion", + "packetMMapReserve", + "packetMMapEp", } } @@ -73,35 +77,43 @@ func (ep *endpoint) StateFields() []string { func (ep *endpoint) StateSave(stateSinkObject state.Sink) { ep.beforeSave() stateSinkObject.Save(0, &ep.DefaultSocketOptionsHandler) - stateSinkObject.Save(1, &ep.waiterQueue) - stateSinkObject.Save(2, &ep.cooked) - stateSinkObject.Save(3, &ep.ops) - stateSinkObject.Save(4, &ep.stats) - stateSinkObject.Save(5, &ep.rcvList) - stateSinkObject.Save(6, &ep.rcvBufSize) - stateSinkObject.Save(7, &ep.rcvClosed) - stateSinkObject.Save(8, &ep.rcvDisabled) - stateSinkObject.Save(9, &ep.closed) - stateSinkObject.Save(10, &ep.boundNetProto) - stateSinkObject.Save(11, &ep.boundNIC) - stateSinkObject.Save(12, &ep.lastError) + stateSinkObject.Save(1, &ep.stack) + stateSinkObject.Save(2, &ep.waiterQueue) + stateSinkObject.Save(3, &ep.cooked) + stateSinkObject.Save(4, &ep.ops) + stateSinkObject.Save(5, &ep.stats) + stateSinkObject.Save(6, &ep.rcvList) + stateSinkObject.Save(7, &ep.rcvBufSize) + stateSinkObject.Save(8, &ep.rcvClosed) + stateSinkObject.Save(9, &ep.rcvDisabled) + stateSinkObject.Save(10, &ep.closed) + stateSinkObject.Save(11, &ep.boundNetProto) + stateSinkObject.Save(12, &ep.boundNIC) + stateSinkObject.Save(13, &ep.lastError) + stateSinkObject.Save(14, &ep.packetMMapVersion) + stateSinkObject.Save(15, &ep.packetMMapReserve) + stateSinkObject.Save(16, &ep.packetMMapEp) } // +checklocksignore func (ep *endpoint) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &ep.DefaultSocketOptionsHandler) - stateSourceObject.Load(1, &ep.waiterQueue) - stateSourceObject.Load(2, &ep.cooked) - stateSourceObject.Load(3, &ep.ops) - stateSourceObject.Load(4, &ep.stats) - stateSourceObject.Load(5, &ep.rcvList) - stateSourceObject.Load(6, &ep.rcvBufSize) - stateSourceObject.Load(7, &ep.rcvClosed) - stateSourceObject.Load(8, &ep.rcvDisabled) - stateSourceObject.Load(9, &ep.closed) - stateSourceObject.Load(10, &ep.boundNetProto) - stateSourceObject.Load(11, &ep.boundNIC) - stateSourceObject.Load(12, &ep.lastError) + stateSourceObject.Load(1, &ep.stack) + stateSourceObject.Load(2, &ep.waiterQueue) + stateSourceObject.Load(3, &ep.cooked) + stateSourceObject.Load(4, &ep.ops) + stateSourceObject.Load(5, &ep.stats) + stateSourceObject.Load(6, &ep.rcvList) + stateSourceObject.Load(7, &ep.rcvBufSize) + stateSourceObject.Load(8, &ep.rcvClosed) + stateSourceObject.Load(9, &ep.rcvDisabled) + stateSourceObject.Load(10, &ep.closed) + stateSourceObject.Load(11, &ep.boundNetProto) + stateSourceObject.Load(12, &ep.boundNIC) + stateSourceObject.Load(13, &ep.lastError) + stateSourceObject.Load(14, &ep.packetMMapVersion) + stateSourceObject.Load(15, &ep.packetMMapReserve) + stateSourceObject.Load(16, &ep.packetMMapEp) stateSourceObject.AfterLoad(func() { ep.afterLoad(ctx) }) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint.go index 1eaedc19..5203c4dd 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint.go @@ -73,7 +73,7 @@ type endpoint struct { // The following fields are initialized at creation time and are // immutable. - stack *stack.Stack `state:"manual"` + stack *stack.Stack transProto tcpip.TransportProtocolNumber waiterQueue *waiter.Queue associated bool diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint_state.go index d915ade2..012611bc 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/endpoint_state.go @@ -16,9 +16,9 @@ package raw import ( "context" - "fmt" "time" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -35,7 +35,11 @@ func (p *rawPacket) loadReceivedAt(_ context.Context, nsec int64) { // afterLoad is invoked by stateify. func (e *endpoint) afterLoad(ctx context.Context) { - stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + if e.stack.IsSaveRestoreEnabled() { + e.stack.RegisterRestoredEndpoint(e) + } else { + stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + } } // beforeSave is invoked by stateify. @@ -46,16 +50,24 @@ func (e *endpoint) beforeSave() { // Restore implements tcpip.RestoredEndpoint.Restore. func (e *endpoint) Restore(s *stack.Stack) { - e.net.Resume(s) - + if err := e.net.Resume(s); err != nil { + log.Warningf("Closing the raw endpoint as it cannot be restored, err: %v", err) + e.Close() + return + } e.setReceiveDisabled(false) + if e.stack.IsSaveRestoreEnabled() { + e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) + return + } + e.stack = s e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) if e.associated { netProto := e.net.NetProto() if err := e.stack.RegisterRawTransportEndpoint(netProto, e.transProto, e); err != nil { - panic(fmt.Sprintf("e.stack.RegisterRawTransportEndpoint(%d, %d, _): %s", netProto, e.transProto, err)) + panic("RegisterRawTransportEndpoint failed during restore") } } } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/raw_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/raw_state_autogen.go index 0793ad18..b3155999 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/raw_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/raw/raw_state_autogen.go @@ -29,8 +29,8 @@ func (p *rawPacket) beforeSave() {} // +checklocksignore func (p *rawPacket) StateSave(stateSinkObject state.Sink) { p.beforeSave() - var receivedAtValue int64 - receivedAtValue = p.saveReceivedAt() + receivedAtValue := p.saveReceivedAt() + _ = (int64)(receivedAtValue) stateSinkObject.SaveValue(2, receivedAtValue) stateSinkObject.Save(0, &p.rawPacketEntry) stateSinkObject.Save(1, &p.data) @@ -60,6 +60,7 @@ func (e *endpoint) StateTypeName() string { func (e *endpoint) StateFields() []string { return []string{ "DefaultSocketOptionsHandler", + "stack", "transProto", "waiterQueue", "associated", @@ -79,35 +80,37 @@ func (e *endpoint) StateFields() []string { func (e *endpoint) StateSave(stateSinkObject state.Sink) { e.beforeSave() stateSinkObject.Save(0, &e.DefaultSocketOptionsHandler) - stateSinkObject.Save(1, &e.transProto) - stateSinkObject.Save(2, &e.waiterQueue) - stateSinkObject.Save(3, &e.associated) - stateSinkObject.Save(4, &e.net) - stateSinkObject.Save(5, &e.stats) - stateSinkObject.Save(6, &e.ops) - stateSinkObject.Save(7, &e.rcvList) - stateSinkObject.Save(8, &e.rcvBufSize) - stateSinkObject.Save(9, &e.rcvClosed) - stateSinkObject.Save(10, &e.rcvDisabled) - stateSinkObject.Save(11, &e.ipv6ChecksumOffset) - stateSinkObject.Save(12, &e.icmpv6Filter) + stateSinkObject.Save(1, &e.stack) + stateSinkObject.Save(2, &e.transProto) + stateSinkObject.Save(3, &e.waiterQueue) + stateSinkObject.Save(4, &e.associated) + stateSinkObject.Save(5, &e.net) + stateSinkObject.Save(6, &e.stats) + stateSinkObject.Save(7, &e.ops) + stateSinkObject.Save(8, &e.rcvList) + stateSinkObject.Save(9, &e.rcvBufSize) + stateSinkObject.Save(10, &e.rcvClosed) + stateSinkObject.Save(11, &e.rcvDisabled) + stateSinkObject.Save(12, &e.ipv6ChecksumOffset) + stateSinkObject.Save(13, &e.icmpv6Filter) } // +checklocksignore func (e *endpoint) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &e.DefaultSocketOptionsHandler) - stateSourceObject.Load(1, &e.transProto) - stateSourceObject.Load(2, &e.waiterQueue) - stateSourceObject.Load(3, &e.associated) - stateSourceObject.Load(4, &e.net) - stateSourceObject.Load(5, &e.stats) - stateSourceObject.Load(6, &e.ops) - stateSourceObject.Load(7, &e.rcvList) - stateSourceObject.Load(8, &e.rcvBufSize) - stateSourceObject.Load(9, &e.rcvClosed) - stateSourceObject.Load(10, &e.rcvDisabled) - stateSourceObject.Load(11, &e.ipv6ChecksumOffset) - stateSourceObject.Load(12, &e.icmpv6Filter) + stateSourceObject.Load(1, &e.stack) + stateSourceObject.Load(2, &e.transProto) + stateSourceObject.Load(3, &e.waiterQueue) + stateSourceObject.Load(4, &e.associated) + stateSourceObject.Load(5, &e.net) + stateSourceObject.Load(6, &e.stats) + stateSourceObject.Load(7, &e.ops) + stateSourceObject.Load(8, &e.rcvList) + stateSourceObject.Load(9, &e.rcvBufSize) + stateSourceObject.Load(10, &e.rcvClosed) + stateSourceObject.Load(11, &e.rcvDisabled) + stateSourceObject.Load(12, &e.ipv6ChecksumOffset) + stateSourceObject.Load(13, &e.icmpv6Filter) stateSourceObject.AfterLoad(func() { e.afterLoad(ctx) }) } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept.go index adcfdcfd..264127d1 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept.go @@ -23,7 +23,6 @@ import ( "io" "time" - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/ports" @@ -88,7 +87,7 @@ type listenContext struct { listenEP *Endpoint // hasherMu protects hasher. - hasherMu sync.Mutex + hasherMu hasherMutex // hasher is the hash function used to generate a SYN cookie. hasher hash.Hash @@ -259,7 +258,7 @@ func (l *listenContext) startHandshake(s *segment, opts header.TCPSynOptions, qu // Propagate any inheritable options from the listening endpoint // to the newly created endpoint. - l.listenEP.propagateInheritableOptionsLocked(ep) // +checklocksforce + l.listenEP.propagateInheritableOptionsLocked(ep) // +checklocksforce:ep.mu if !ep.reserveTupleLocked() { ep.mu.Unlock() @@ -526,13 +525,14 @@ func (e *Endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err } cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) fields := tcpFields{ - id: s.id, - ttl: calculateTTL(route, e.ipv4TTL, e.ipv6HopLimit), - tos: e.sendTOS, - flags: header.TCPFlagSyn | header.TCPFlagAck, - seq: cookie, - ack: s.sequenceNumber + 1, - rcvWnd: ctx.rcvWnd, + id: s.id, + ttl: calculateTTL(route, e.ipv4TTL, e.ipv6HopLimit), + tos: e.sendTOS, + flags: header.TCPFlagSyn | header.TCPFlagAck, + seq: cookie, + ack: s.sequenceNumber + 1, + rcvWnd: ctx.rcvWnd, + expOptVal: e.getExperimentOptionValue(route), } if err := e.sendSynTCP(route, fields, synOpts); err != nil { return err diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept_mutex.go new file mode 100644 index 00000000..8c9da64e --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/accept_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type acceptMutex struct { + mu sync.Mutex +} + +var acceptprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var acceptlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type acceptlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *acceptMutex) Lock() { + locking.AddGLock(acceptprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *acceptMutex) NestedLock(i acceptlockNameIndex) { + locking.AddGLock(acceptprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *acceptMutex) Unlock() { + locking.DelGLock(acceptprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *acceptMutex) NestedUnlock(i acceptlockNameIndex) { + locking.DelGLock(acceptprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func acceptinitLockNames() {} + +func init() { + acceptinitLockNames() + acceptprefixIndex = locking.NewMutexClass(reflect.TypeFor[acceptMutex](), acceptlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/connect.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/connect.go index 4125af98..76d34743 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/connect.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/connect.go @@ -40,10 +40,10 @@ const ( // https://github.com/torvalds/linux/blob/7c636d4d20f/include/net/tcp.h#L142 InitialRTO = time.Second - // maxSegmentsPerWake is the maximum number of segments to process in the main - // protocol goroutine per wake-up. Yielding [after this number of segments are - // processed] allows other events to be processed as well (e.g., timeouts, - // resets, etc.). + // maxSegmentsPerWake is the maximum number of segments to process per + // wake-up. Yielding [after this number of segments are processed] + // allows other events to be processed as well (e.g., timeouts, resets, + // etc.). maxSegmentsPerWake = 100 ) @@ -364,13 +364,14 @@ func (h *handshake) synSentState(s *segment) tcpip.Error { ttl = h.ep.route.DefaultTTL() } h.ep.sendSynTCP(h.ep.route, tcpFields{ - id: h.ep.TransportEndpointInfo.ID, - ttl: ttl, - tos: h.ep.sendTOS, - flags: h.flags, - seq: h.iss, - ack: h.ackNum, - rcvWnd: h.rcvWnd, + id: h.ep.TransportEndpointInfo.ID, + ttl: ttl, + tos: h.ep.sendTOS, + flags: h.flags, + seq: h.iss, + ack: h.ackNum, + rcvWnd: h.rcvWnd, + expOptVal: h.ep.getExperimentOptionValue(h.ep.route), }, synOpts) return nil } @@ -450,13 +451,14 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error { MSS: h.ep.amss, } h.ep.sendSynTCP(h.ep.route, tcpFields{ - id: h.ep.TransportEndpointInfo.ID, - ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit), - tos: h.ep.sendTOS, - flags: h.flags, - seq: h.iss, - ack: h.ackNum, - rcvWnd: h.rcvWnd, + id: h.ep.TransportEndpointInfo.ID, + ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit), + tos: h.ep.sendTOS, + flags: h.flags, + seq: h.iss, + ack: h.ackNum, + rcvWnd: h.rcvWnd, + expOptVal: h.ep.getExperimentOptionValue(h.ep.route), }, synOpts) return nil } @@ -543,7 +545,7 @@ func (h *handshake) processSegments() tcpip.Error { // We stop processing packets once the handshake is completed, // otherwise we may process packets meant to be processed by - // the main protocol goroutine. + // the TCP processor goroutine. if h.state == handshakeCompleted { break } @@ -587,13 +589,14 @@ func (h *handshake) start() { h.sendSYNOpts = synOpts h.ep.sendSynTCP(h.ep.route, tcpFields{ - id: h.ep.TransportEndpointInfo.ID, - ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit), - tos: h.ep.sendTOS, - flags: h.flags, - seq: h.iss, - ack: h.ackNum, - rcvWnd: h.rcvWnd, + id: h.ep.TransportEndpointInfo.ID, + ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit), + tos: h.ep.sendTOS, + flags: h.flags, + seq: h.iss, + ack: h.ackNum, + rcvWnd: h.rcvWnd, + expOptVal: h.ep.getExperimentOptionValue(h.ep.route), }, synOpts) } @@ -623,13 +626,14 @@ func (h *handshake) retransmitHandlerLocked() tcpip.Error { // retransmitted on their own). if h.active || !h.acked || h.deferAccept != 0 && e.stack.Clock().NowMonotonic().Sub(h.startTime) > h.deferAccept { e.sendSynTCP(e.route, tcpFields{ - id: e.TransportEndpointInfo.ID, - ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit), - tos: e.sendTOS, - flags: h.flags, - seq: h.iss, - ack: h.ackNum, - rcvWnd: h.rcvWnd, + id: e.TransportEndpointInfo.ID, + ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit), + tos: e.sendTOS, + flags: h.flags, + seq: h.iss, + ack: h.ackNum, + rcvWnd: h.rcvWnd, + expOptVal: e.getExperimentOptionValue(e.route), }, h.sendSYNOpts) // If we have ever retransmitted the SYN-ACK or // SYN segment, we should only measure RTT if @@ -639,7 +643,7 @@ func (h *handshake) retransmitHandlerLocked() tcpip.Error { return nil } -// transitionToStateEstablisedLocked transitions the endpoint of the handshake +// transitionToStateEstablishedLocked transitions the endpoint of the handshake // to an established state given the last segment received from peer. It also // initializes sender/receiver. // +checklocks:h.ep.mu @@ -652,7 +656,7 @@ func (h *handshake) transitionToStateEstablishedLocked(s *segment) { // Transfer handshake state to TCP connection. We disable // receive window scaling if the peer doesn't support it // (indicated by a negative send window scale). - h.ep.snd = newSender(h.ep, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) + initSender(h.ep, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) now := h.ep.stack.Clock().NowMonotonic() @@ -800,22 +804,27 @@ func makeSynOptions(opts header.TCPSynOptions) []byte { // tcpFields is a struct to carry different parameters required by the // send*TCP variant functions below. type tcpFields struct { - id stack.TransportEndpointID - ttl uint8 - tos uint8 - flags header.TCPFlags - seq seqnum.Value - ack seqnum.Value - rcvWnd seqnum.Size - opts []byte - txHash uint32 - df bool + id stack.TransportEndpointID + ttl uint8 + tos uint8 + flags header.TCPFlags + seq seqnum.Value + ack seqnum.Value + rcvWnd seqnum.Size + opts []byte + txHash uint32 + df bool + expOptVal uint16 } func (e *Endpoint) sendSynTCP(r *stack.Route, tf tcpFields, opts header.TCPSynOptions) tcpip.Error { tf.opts = makeSynOptions(opts) // We ignore SYN send errors and let the callers re-attempt send. - p := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: header.TCPMinimumSize + int(r.MaxHeaderLength()) + len(tf.opts)}) + hdrSize := header.TCPMinimumSize + int(r.MaxHeaderLength()) + len(tf.opts) + if r.NetProto() == header.IPv6ProtocolNumber && tf.expOptVal != 0 { + hdrSize += header.IPv6ExperimentHdrLength + } + p := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: hdrSize}) defer p.DecRef() if err := e.sendTCP(r, tf, p, stack.GSO{}); err != nil { e.stats.SendErrors.SynSendToNetworkFailed.Increment() @@ -887,6 +896,10 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso sta // packet already has the truncated data. shouldSplitPacket := i != n-1 if shouldSplitPacket { + if r.NetProto() == header.IPv6ProtocolNumber && tf.expOptVal != 0 { + // Reserve extra bytes for the experiment option. + hdrSize += header.IPv6ExperimentHdrLength + } splitPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: hdrSize}) splitPkt.Data().ReadFromPacketData(pkt.Data(), packetSize) pkt = splitPkt @@ -897,7 +910,13 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso sta buildTCPHdr(r, tf, pkt, gso) tf.seq = tf.seq.Add(seqnum.Size(packetSize)) pkt.GSOOptions = gso - if err := r.WritePacket(stack.NetworkHeaderParams{Protocol: ProtocolNumber, TTL: tf.ttl, TOS: tf.tos, DF: tf.df}, pkt); err != nil { + if err := r.WritePacket(stack.NetworkHeaderParams{ + Protocol: ProtocolNumber, + TTL: tf.ttl, + TOS: tf.tos, + DF: tf.df, + ExperimentOptionValue: tf.expOptVal, + }, pkt); err != nil { r.Stats().TCP.SegmentSendErrors.Increment() if shouldSplitPacket { pkt.DecRef() @@ -929,7 +948,13 @@ func sendTCP(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso stack.GS pkt.Owner = owner buildTCPHdr(r, tf, pkt, gso) - if err := r.WritePacket(stack.NetworkHeaderParams{Protocol: ProtocolNumber, TTL: tf.ttl, TOS: tf.tos, DF: tf.df}, pkt); err != nil { + if err := r.WritePacket(stack.NetworkHeaderParams{ + Protocol: ProtocolNumber, + TTL: tf.ttl, + TOS: tf.tos, + DF: tf.df, + ExperimentOptionValue: tf.expOptVal, + }, pkt); err != nil { r.Stats().TCP.SegmentSendErrors.Increment() return err } @@ -982,7 +1007,6 @@ func (e *Endpoint) makeOptions(sackBlocks []header.SACKBlock) []byte { // sendEmptyRaw sends a TCP segment with no payload to the endpoint's peer. // // +checklocks:e.mu -// +checklocksalias:e.snd.ep.mu=e.mu func (e *Endpoint) sendEmptyRaw(flags header.TCPFlags, seq, ack seqnum.Value, rcvWnd seqnum.Size) tcpip.Error { pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{}) defer pkt.DecRef() @@ -993,7 +1017,6 @@ func (e *Endpoint) sendEmptyRaw(flags header.TCPFlags, seq, ack seqnum.Value, rc // ownership of pkt. pkt must not have any headers set. // // +checklocks:e.mu -// +checklocksalias:e.snd.ep.mu=e.mu func (e *Endpoint) sendRaw(pkt *stack.PacketBuffer, flags header.TCPFlags, seq, ack seqnum.Value, rcvWnd seqnum.Size) tcpip.Error { var sackBlocks []header.SACKBlock if e.EndpointState() == StateEstablished && e.rcv.pendingRcvdSegments.Len() > 0 && (flags&header.TCPFlagAck != 0) { @@ -1001,7 +1024,12 @@ func (e *Endpoint) sendRaw(pkt *stack.PacketBuffer, flags header.TCPFlags, seq, } options := e.makeOptions(sackBlocks) defer putOptions(options) - pkt.ReserveHeaderBytes(header.TCPMinimumSize + int(e.route.MaxHeaderLength()) + len(options)) + hdrSize := header.TCPMinimumSize + int(e.route.MaxHeaderLength()) + len(options) + expOptVal := e.getExperimentOptionValue(e.route) + if e.route.NetProto() == header.IPv6ProtocolNumber && expOptVal != 0 { + hdrSize += header.IPv6ExperimentHdrLength + } + pkt.ReserveHeaderBytes(hdrSize) return e.sendTCP(e.route, tcpFields{ id: e.TransportEndpointInfo.ID, ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit), @@ -1011,12 +1039,13 @@ func (e *Endpoint) sendRaw(pkt *stack.PacketBuffer, flags header.TCPFlags, seq, ack: ack, rcvWnd: rcvWnd, opts: options, - df: e.pmtud == tcpip.PMTUDiscoveryWant || e.pmtud == tcpip.PMTUDiscoveryDo, + // PROBE sets DF like DO; see network/endpoint.go for details. + df: e.pmtud == tcpip.PMTUDiscoveryWant || e.pmtud == tcpip.PMTUDiscoveryDo || e.pmtud == tcpip.PMTUDiscoveryProbe, + expOptVal: expOptVal, }, pkt, e.gso) } // +checklocks:e.mu -// +checklocksalias:e.snd.ep.mu=e.mu func (e *Endpoint) sendData(next *segment) { // Initialize the next segment to write if it's currently nil. if e.snd.writeNext == nil { @@ -1032,8 +1061,7 @@ func (e *Endpoint) sendData(next *segment) { // resetConnectionLocked puts the endpoint in an error state with the given // error code and sends a RST if and only if the error is not ErrConnectionReset -// indicating that the connection is being reset due to receiving a RST. This -// method must only be called from the protocol goroutine. +// indicating that the connection is being reset due to receiving a RST. // +checklocks:e.mu func (e *Endpoint) resetConnectionLocked(err tcpip.Error) { // Only send a reset if the connection is being aborted for a reason @@ -1049,12 +1077,26 @@ func (e *Endpoint) resetConnectionLocked(err tcpip.Error) { // // See: https://www.snellman.net/blog/archive/2016-02-01-tcp-rst/ for more // information. - sndWndEnd := e.snd.SndUna.Add(e.snd.SndWnd) - resetSeqNum := sndWndEnd - if !sndWndEnd.LessThan(e.snd.SndNxt) || e.snd.SndNxt.Size(sndWndEnd) < (1< 0 { // If the window was small before this read and if the read freed up // enough buffer space, to either fit an aMSS or half a receive buffer - // (whichever smaller), then notify the protocol goroutine to send a - // window update. + // (whichever smaller), then send a window update. if crossed, above := e.windowCrossedACKThresholdLocked(memDelta, int(e.ops.GetReceiveBufferSize())); crossed && above { sendNonZeroWindowUpdate = true } @@ -1459,7 +1472,7 @@ func (e *Endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult e.rcvQueueMu.Unlock() if e.EndpointState().connected() && sendNonZeroWindowUpdate { - e.rcv.nonZeroWindow() // +checklocksforce:e.rcv.ep.mu + e.rcv.nonZeroWindow() } } @@ -1821,7 +1834,7 @@ func (e *Endpoint) OnSetReceiveBufferSize(rcvBufSz, oldSz int64) (newSz int64, p e.LockUser() defer e.UnlockUser() if e.EndpointState().connected() && sendNonZeroWindowUpdate { - e.rcv.nonZeroWindow() // +checklocksforce:e.rcv.ep.mu + e.rcv.nonZeroWindow() } } @@ -1888,15 +1901,15 @@ func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { e.UnlockUser() case tcpip.MTUDiscoverOption: + // PROBE is accepted alongside DO/WANT/DONT. In Linux, + // PROBE sets DF but ignores ICMP-based PMTU updates; + // since gVisor lacks ICMP PMTU feedback, it behaves + // identically to DO. switch v := tcpip.PMTUDStrategy(v); v { - case tcpip.PMTUDiscoveryWant, tcpip.PMTUDiscoveryDont, tcpip.PMTUDiscoveryDo: + case tcpip.PMTUDiscoveryWant, tcpip.PMTUDiscoveryDont, tcpip.PMTUDiscoveryDo, tcpip.PMTUDiscoveryProbe: e.LockUser() e.pmtud = v e.UnlockUser() - case tcpip.PMTUDiscoveryProbe: - // We don't support a way to ignore MTU updates; it's - // either on or it's off. - return &tcpip.ErrNotSupported{} default: return &tcpip.ErrNotSupported{} } @@ -2466,20 +2479,31 @@ func (e *Endpoint) connect(addr tcpip.FullAddress, handshake bool) tcpip.Error { e.effectiveNetProtos = []tcpip.NetworkProtocolNumber{netProto} e.connectingAddress = connectingAddr + if e.alsoBindToV4 { + // If the endpoint was bound to `any` address the port will be + // reserved for both IPv4 and IPv6 addresses. Release the port + // reservation for the IPv4 address here so that the future bind + // for IPv4 socket will not fail. + portRes := ports.Reservation{ + Networks: []tcpip.NetworkProtocolNumber{header.IPv4ProtocolNumber}, + Transport: ProtocolNumber, + Port: e.TransportEndpointInfo.ID.LocalPort, + } + e.stack.ReleasePort(portRes) + } + e.initGSO() // Connect in the restore phase does not perform handshake. Restore its // connection setting here. if !handshake { e.segmentQueue.mu.Lock() - for _, l := range []segmentList{e.segmentQueue.list, e.snd.writeList} { + for _, l := range []segmentList{e.segmentQueue.list, e.snd.writeList.writeList} { for s := l.Front(); s != nil; s = s.Next() { s.id = e.TransportEndpointInfo.ID - e.sndQueueInfo.sndWaker.Assert() } } e.segmentQueue.mu.Unlock() - e.snd.ep.AssertLockHeld(e) e.snd.updateMaxPayloadSize(int(e.route.MTU()), 0) e.setEndpointState(StateEstablished) // Set the new auto tuned send buffer size after entering @@ -2755,6 +2779,7 @@ func (e *Endpoint) bindLocked(addr tcpip.FullAddress) (err tcpip.Error) { alsoBindToV4 := !e.ops.GetV6Only() && addr.Addr == tcpip.Address{} && stackHasV4 if alsoBindToV4 { netProtos = append(netProtos, header.IPv4ProtocolNumber) + e.alsoBindToV4 = true } } @@ -2904,7 +2929,8 @@ func (e *Endpoint) onICMPError(err tcpip.Error, transErr stack.TransportError, p if e.EndpointState().connecting() { e.mu.Lock() - if lEP := e.h.listenEP; lEP != nil { + if e.h != nil && e.h.listenEP != nil { + lEP := e.h.listenEP // Remove from listening endpoints pending list. lEP.acceptMu.Lock() delete(lEP.acceptQueue.pendingEndpoints, e) @@ -2936,7 +2962,7 @@ func (e *Endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketB e.mu.Lock() defer e.mu.Unlock() if e.snd != nil { - e.snd.updateMaxPayloadSize(newMTU, 1 /* count */) // +checklocksforce:e.snd.ep.mu + e.snd.updateMaxPayloadSize(newMTU, 1 /* count */) } } } @@ -2962,8 +2988,10 @@ func (e *Endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketB } } -// updateSndBufferUsage is called by the protocol goroutine when room opens up -// in the send buffer. The number of newly available bytes is v. +// updateSndBufferUsage is called by when room opens up in the send buffer. The +// number of newly available bytes is v. +// +// +checklocks:e.mu func (e *Endpoint) updateSndBufferUsage(v int) { sendBufferSize := e.getSendBufferSize() e.sndQueueInfo.sndQueueMu.Lock() @@ -2987,9 +3015,8 @@ func (e *Endpoint) updateSndBufferUsage(v int) { } } -// readyToRead is called by the protocol goroutine when a new segment is ready -// to be read, or when the connection is closed for receiving (in which case -// s will be nil). +// readyToRead is called when a new segment is ready to be read, or when the +// connection is closed for receiving (in which case s will be nil). // // +checklocks:e.mu func (e *Endpoint) readyToRead(s *segment) { @@ -3146,9 +3173,9 @@ func (e *Endpoint) maxOptionSize() (size int) { // used before invoking the probe. // // +checklocks:e.mu -func (e *Endpoint) completeStateLocked(s *stack.TCPEndpointState) { +func (e *Endpoint) completeStateLocked(s *TCPEndpointState) { s.TCPEndpointStateInner = e.TCPEndpointStateInner - s.ID = stack.TCPEndpointID(e.TransportEndpointInfo.ID) + s.ID = TCPEndpointID(e.TransportEndpointInfo.ID) s.SegTime = e.stack.Clock().NowMonotonic() s.Receiver = e.rcv.TCPReceiverState s.Sender = e.snd.TCPSenderState @@ -3254,11 +3281,7 @@ func GetTCPSendBufferLimits(sh tcpip.StackHandler) tcpip.SendBufferSizeOption { // This type assertion is safe because only the TCP stack calls this // function. ss := sh.(*stack.Stack).TCPSendBufferLimits() - return tcpip.SendBufferSizeOption{ - Min: ss.Min, - Default: ss.Default, - Max: ss.Max, - } + return tcpip.SendBufferSizeOption(ss) } // allowOutOfWindowAck returns true if an out-of-window ACK can be sent now. @@ -3286,15 +3309,13 @@ func GetTCPReceiveBufferLimits(s tcpip.StackHandler) tcpip.ReceiveBufferSizeOpti panic(fmt.Sprintf("s.TransportProtocolOption(%d, %#v) = %s", header.TCPProtocolNumber, ss, err)) } - return tcpip.ReceiveBufferSizeOption{ - Min: ss.Min, - Default: ss.Default, - Max: ss.Max, - } + return tcpip.ReceiveBufferSizeOption(ss) } // computeTCPSendBufferSize implements auto tuning of send buffer size and // returns the new send buffer size. +// +// +checklocks:e.mu func (e *Endpoint) computeTCPSendBufferSize() int64 { curSndBufSz := int64(e.getSendBufferSize()) @@ -3330,3 +3351,12 @@ func (e *Endpoint) computeTCPSendBufferSize() int64 { func (e *Endpoint) GetAcceptConn() bool { return EndpointState(e.State()) == StateListen } + +// getExperimentOptionValue returns the experiment option value set on the +// endpoint if experiment IP options are enabled on outgoing NIC of the route. +func (e *Endpoint) getExperimentOptionValue(route *stack.Route) uint16 { + if nic, err := e.stack.GetNICByID(route.OutgoingNIC()); err == nil && nic.GetExperimentIPOptionEnabled() { + return e.ops.GetExperimentOptionValue() + } + return 0 +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/endpoint_state.go index 63457f7f..05cf7919 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/endpoint_state.go @@ -19,13 +19,24 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/ports" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" ) +// logDisconnectOnce ensures we don't spam logs when many connections are terminated. +var logDisconnectOnce sync.Once + +func logDisconnect() { + logDisconnectOnce.Do(func() { + log.Infof("One or more TCP connections terminated during save restore") + }) +} + // beforeSave is invoked by stateify. func (e *Endpoint) beforeSave() { // Stop incoming packets. @@ -38,16 +49,22 @@ func (e *Endpoint) beforeSave() { switch { case epState == StateInitial || epState == StateBound: case epState.connected() || epState.handshake(): + // Terminate valid connections only for restore. if !e.route.HasSaveRestoreCapability() { - if !e.route.HasDisconnectOkCapability() { - panic(&tcpip.ErrSaveRejection{ - Err: fmt.Errorf("endpoint cannot be saved in connected state: local %s:%d, remote %s:%d", e.TransportEndpointInfo.ID.LocalAddress, e.TransportEndpointInfo.ID.LocalPort, e.TransportEndpointInfo.ID.RemoteAddress, e.TransportEndpointInfo.ID.RemotePort), - }) + if e.stack.GetRemoveConf() { + // Terminate the endpoint when resume=false. + logDisconnect() + e.terminateAtRestore = false + e.resetConnectionLocked(&tcpip.ErrConnectionAborted{}) + e.mu.Unlock() + e.Close() + e.mu.Lock() + } else { + // This is set only when resume=true, the termination + // of this endpoint will happen during restore of the + // saved snapshot. + e.terminateAtRestore = true } - e.resetConnectionLocked(&tcpip.ErrConnectionAborted{}) - e.mu.Unlock() - e.Close() - e.mu.Lock() } fallthrough case epState == StateListen: @@ -95,8 +112,9 @@ var connectingLoading sync.WaitGroup func (e *Endpoint) loadState(_ context.Context, epState EndpointState) { // This is to ensure that the loading wait groups include all applicable // endpoints before any asynchronous calls to the Wait() methods. - // For restore purposes we treat TimeWait like a connected endpoint. - if epState.connected() || epState == StateTimeWait { + // For restore purposes we treat all endpoints with state after + // StateEstablished and before StateClosed like connected endpoint. + if epState.connected() { connectedLoading.Add(1) } switch { @@ -118,7 +136,46 @@ func (e *Endpoint) afterLoad(ctx context.Context) { // Restore the endpoint to InitialState as it will be moved to // its origEndpointState during Restore. e.state = atomicbitops.FromUint32(uint32(StateInitial)) - stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + if e.stack.IsSaveRestoreEnabled() { + e.stack.RegisterRestoredEndpoint(e) + } else { + stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + } +} + +// Close the endpoint during restore if terminateAtRestore was set for the endpoint. +func (e *Endpoint) closeEndpointAtRestore() { + e.mu.Lock() + defer e.mu.Unlock() + + epState := EndpointState(e.origEndpointState) + if !epState.connected() && !epState.handshake() { + log.Debugf("endpoint was marked to terminate at restore in a wrong state, ID: %+v state: %v", e.ID, epState) + return + } + + if epState.handshake() { + connectedLoading.Wait() + listenLoading.Wait() + } + + // Put the endpoint in the error state and do cleanup. Do not + // attempt to send RST as route will be nil. + e.purgeReadQueue() + if epState.connected() { + e.purgeWriteQueue() + e.purgePendingRcvQueue() + e.cleanupLocked() + } + e.state.Store(uint32(StateError)) + e.closeNoShutdownLocked() + tcpip.DeleteDanglingEndpoint(e) + + if epState.connected() { + connectedLoading.Done() + } else { + connectingLoading.Done() + } } // Restore implements tcpip.RestoredEndpoint.Restore. @@ -132,29 +189,39 @@ func (e *Endpoint) Restore(s *stack.Stack) { snd.probeTimer.init(s.Clock(), timerHandler(e, e.snd.probeTimerExpired)) snd.corkTimer.init(s.Clock(), timerHandler(e, e.snd.corkTimerExpired)) } - e.stack = s - e.protocol = protocolFromStack(s) + saveRestoreEnabled := e.stack.IsSaveRestoreEnabled() + if !saveRestoreEnabled { + e.stack = s + e.protocol = protocolFromStack(s) + } e.ops.InitHandler(e, e.stack, GetTCPSendBufferLimits, GetTCPReceiveBufferLimits) e.segmentQueue.thaw() + e.mu.Lock() + id := e.ID + terminateAtRestore := e.terminateAtRestore + e.mu.Unlock() + bind := func() { e.mu.Lock() defer e.mu.Unlock() - addr, _, err := e.checkV4MappedLocked(tcpip.FullAddress{Addr: e.BindAddr, Port: e.TransportEndpointInfo.ID.LocalPort}, true /* bind */) - if err != nil { - panic("unable to parse BindAddr: " + err.String()) - } - portRes := ports.Reservation{ - Networks: e.effectiveNetProtos, - Transport: ProtocolNumber, - Addr: addr.Addr, - Port: addr.Port, - Flags: e.boundPortFlags, - BindToDevice: e.boundBindToDevice, - Dest: e.boundDest, - } - if ok := e.stack.ReserveTuple(portRes); !ok { - panic(fmt.Sprintf("unable to re-reserve tuple (%v, %q, %d, %+v, %d, %v)", e.effectiveNetProtos, addr.Addr, addr.Port, e.boundPortFlags, e.boundBindToDevice, e.boundDest)) + if !saveRestoreEnabled { + addr, _, err := e.checkV4MappedLocked(tcpip.FullAddress{Addr: e.BindAddr, Port: e.TransportEndpointInfo.ID.LocalPort}, true /* bind */) + if err != nil { + panic("unable to parse BindAddr: " + err.String()) + } + portRes := ports.Reservation{ + Networks: e.effectiveNetProtos, + Transport: ProtocolNumber, + Addr: addr.Addr, + Port: addr.Port, + Flags: e.boundPortFlags, + BindToDevice: e.boundBindToDevice, + Dest: e.boundDest, + } + if ok := e.stack.ReserveTuple(portRes); !ok { + panic(fmt.Sprintf("unable to re-reserve tuple (%v, %q, %d, %+v, %d, %v)", e.effectiveNetProtos, addr.Addr, addr.Port, e.boundPortFlags, e.boundBindToDevice, e.boundDest)) + } } e.isPortReserved = true @@ -162,6 +229,11 @@ func (e *Endpoint) Restore(s *stack.Stack) { e.setEndpointState(StateBound) } + if terminateAtRestore { + e.closeEndpointAtRestore() + return + } + epState := EndpointState(e.origEndpointState) switch { case epState.connected(): @@ -182,10 +254,18 @@ func (e *Endpoint) Restore(s *stack.Stack) { // Reset the scoreboard to reinitialize the sack information as // we do not restore SACK information. e.scoreboard.Reset() + if saveRestoreEnabled { + // Unregister the endpoint before registering again during Connect. + e.stack.UnregisterTransportEndpoint(e.effectiveNetProtos, header.TCPProtocolNumber, e.TransportEndpointInfo.ID, e, e.boundPortFlags, e.boundBindToDevice) + } e.mu.Lock() err := e.connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.TransportEndpointInfo.ID.RemotePort}, false /* handshake */) if _, ok := err.(*tcpip.ErrConnectStarted); !ok { - panic("endpoint connecting failed: " + err.String()) + log.Warningf("TCP endpoint connect failed for connected endpoint with ID: %+v err: %v", id, err) + e.mu.Unlock() + e.Close() + connectedLoading.Done() + return } e.state.Store(e.origEndpointState) // For FIN-WAIT-2 and TIME-WAIT we need to start the appropriate timers so @@ -206,23 +286,39 @@ func (e *Endpoint) Restore(s *stack.Stack) { connectedLoading.Done() case epState == StateListen: tcpip.AsyncLoading.Add(1) - go func() { - connectedLoading.Wait() - bind() - e.acceptMu.Lock() - backlog := e.acceptQueue.capacity - e.acceptMu.Unlock() - if err := e.Listen(backlog); err != nil { - panic("endpoint listening failed: " + err.String()) - } - e.LockUser() - if e.shutdownFlags != 0 { - e.shutdownLocked(e.shutdownFlags) - } - e.UnlockUser() - listenLoading.Done() - tcpip.AsyncLoading.Done() - }() + if !saveRestoreEnabled { + go func() { + connectedLoading.Wait() + bind() + e.acceptMu.Lock() + backlog := e.acceptQueue.capacity + e.acceptMu.Unlock() + if err := e.Listen(backlog); err != nil { + panic("endpoint listening failed: " + err.String()) + } + e.LockUser() + if e.shutdownFlags != 0 { + e.shutdownLocked(e.shutdownFlags) + } + e.UnlockUser() + listenLoading.Done() + tcpip.AsyncLoading.Done() + }() + } else { + go func() { + connectedLoading.Wait() + e.LockUser() + // All endpoints will be moved to initial state after + // restore. Set endpoint to its originial listen state. + e.setEndpointState(StateListen) + // Initialize the listening context. + rcvWnd := seqnum.Size(e.receiveBufferAvailable()) + e.listenCtx = newListenContext(e.stack, e.protocol, e, rcvWnd, e.ops.GetV6Only(), e.NetProto) + e.UnlockUser() + listenLoading.Done() + tcpip.AsyncLoading.Done() + }() + } case epState == StateConnecting: // Initial SYN hasn't been sent yet so initiate a connect. tcpip.AsyncLoading.Add(1) @@ -232,32 +328,42 @@ func (e *Endpoint) Restore(s *stack.Stack) { bind() err := e.Connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.TransportEndpointInfo.ID.RemotePort}) if _, ok := err.(*tcpip.ErrConnectStarted); !ok { - panic("endpoint connecting failed: " + err.String()) + log.Warningf("TCP endpoint connect failed for connecting endpoint with ID: %+v err: %v", id, err) + e.Close() } connectingLoading.Done() tcpip.AsyncLoading.Done() }() case epState == StateSynSent || epState == StateSynRecv: - connectedLoading.Wait() - listenLoading.Wait() - // Initial SYN has been sent/received so we should bind the - // ports start the retransmit timer for the SYNs and let it - // naturally complete the connection. - bind() - e.mu.Lock() - defer e.mu.Unlock() - e.setEndpointState(epState) - r, err := e.stack.FindRoute(e.boundNICID, e.TransportEndpointInfo.ID.LocalAddress, e.TransportEndpointInfo.ID.RemoteAddress, e.effectiveNetProtos[0], false /* multicastLoop */) - if err != nil { - panic(fmt.Sprintf("FindRoute failed when restoring endpoint w/ ID: %+v", e.ID)) - } - e.route = r - timer, err := newBackoffTimer(e.stack.Clock(), InitialRTO, MaxRTO, timerHandler(e, e.h.retransmitHandlerLocked)) - if err != nil { - panic(fmt.Sprintf("newBackOffTimer(_, %s, %s, _) failed: %s", InitialRTO, MaxRTO, err)) - } - e.h.retransmitTimer = timer - connectingLoading.Done() + tcpip.AsyncLoading.Add(1) + go func() { + connectedLoading.Wait() + listenLoading.Wait() + // Initial SYN has been sent/received so we should bind the + // ports start the retransmit timer for the SYNs and let it + // naturally complete the connection. + bind() + e.mu.Lock() + e.setEndpointState(epState) + r, err := e.stack.FindRoute(e.boundNICID, e.TransportEndpointInfo.ID.LocalAddress, e.TransportEndpointInfo.ID.RemoteAddress, e.effectiveNetProtos[0], false /* multicastLoop */) + if err != nil { + e.mu.Unlock() + log.Warningf("FindRoute failed when restoring endpoint w/ ID: %+v err: %v", id, err) + e.Close() + connectingLoading.Done() + tcpip.AsyncLoading.Done() + return + } + e.route = r + timer, err := newBackoffTimer(e.stack.Clock(), InitialRTO, MaxRTO, timerHandler(e, e.h.retransmitHandlerLocked)) + if err != nil { + panic(fmt.Sprintf("newBackOffTimer(_, %s, %s, _) failed: %s", InitialRTO, MaxRTO, err)) + } + e.h.retransmitTimer = timer + connectingLoading.Done() + tcpip.AsyncLoading.Done() + e.mu.Unlock() + }() case epState == StateBound: tcpip.AsyncLoading.Add(1) go func() { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/ep_queue_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/ep_queue_mutex.go new file mode 100644 index 00000000..c334b2c6 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/ep_queue_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type epQueueMutex struct { + mu sync.Mutex +} + +var epQueueprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var epQueuelockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type epQueuelockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *epQueueMutex) Lock() { + locking.AddGLock(epQueueprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *epQueueMutex) NestedLock(i epQueuelockNameIndex) { + locking.AddGLock(epQueueprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *epQueueMutex) Unlock() { + locking.DelGLock(epQueueprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *epQueueMutex) NestedUnlock(i epQueuelockNameIndex) { + locking.DelGLock(epQueueprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func epQueueinitLockNames() {} + +func init() { + epQueueinitLockNames() + epQueueprefixIndex = locking.NewMutexClass(reflect.TypeFor[epQueueMutex](), epQueuelockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder.go index 39a52215..261a3b57 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder.go @@ -15,7 +15,6 @@ package tcp import ( - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" @@ -35,7 +34,7 @@ type Forwarder struct { maxInFlight int handler func(*ForwarderRequest) - mu sync.Mutex + mu forwarderMutex inFlight map[stack.TransportEndpointID]struct{} listen *listenContext } @@ -108,7 +107,7 @@ func (f *Forwarder) HandlePacket(id stack.TransportEndpointID, pkt *stack.Packet // and passed to the client. Clients must eventually call Complete() on it, and // may optionally create an endpoint to represent it via CreateEndpoint. type ForwarderRequest struct { - mu sync.Mutex + mu forwarderRequestMutex forwarder *Forwarder segment *segment synOptions header.TCPSynOptions @@ -170,3 +169,12 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, return ep, nil } + +// ForwardedPacketExperimentOption returns the experiment option value from the +// forwarded packet and a bool indicating whether an experiment option value was +// found. +func (r *ForwarderRequest) ForwardedPacketExperimentOption() (uint16, bool) { + r.mu.Lock() + defer r.mu.Unlock() + return r.segment.pkt.ExperimentOptionValue() +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_mutex.go new file mode 100644 index 00000000..b81e9efa --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type forwarderMutex struct { + mu sync.Mutex +} + +var forwarderprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var forwarderlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type forwarderlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *forwarderMutex) Lock() { + locking.AddGLock(forwarderprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *forwarderMutex) NestedLock(i forwarderlockNameIndex) { + locking.AddGLock(forwarderprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *forwarderMutex) Unlock() { + locking.DelGLock(forwarderprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *forwarderMutex) NestedUnlock(i forwarderlockNameIndex) { + locking.DelGLock(forwarderprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func forwarderinitLockNames() {} + +func init() { + forwarderinitLockNames() + forwarderprefixIndex = locking.NewMutexClass(reflect.TypeFor[forwarderMutex](), forwarderlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_request_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_request_mutex.go new file mode 100644 index 00000000..23fdcb2e --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/forwarder_request_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type forwarderRequestMutex struct { + mu sync.Mutex +} + +var forwarderRequestprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var forwarderRequestlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type forwarderRequestlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *forwarderRequestMutex) Lock() { + locking.AddGLock(forwarderRequestprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *forwarderRequestMutex) NestedLock(i forwarderRequestlockNameIndex) { + locking.AddGLock(forwarderRequestprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *forwarderRequestMutex) Unlock() { + locking.DelGLock(forwarderRequestprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *forwarderRequestMutex) NestedUnlock(i forwarderRequestlockNameIndex) { + locking.DelGLock(forwarderRequestprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func forwarderRequestinitLockNames() {} + +func init() { + forwarderRequestinitLockNames() + forwarderRequestprefixIndex = locking.NewMutexClass(reflect.TypeFor[forwarderRequestMutex](), forwarderRequestlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/hasher_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/hasher_mutex.go new file mode 100644 index 00000000..947365e7 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/hasher_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type hasherMutex struct { + mu sync.Mutex +} + +var hasherprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var hasherlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type hasherlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *hasherMutex) Lock() { + locking.AddGLock(hasherprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *hasherMutex) NestedLock(i hasherlockNameIndex) { + locking.AddGLock(hasherprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *hasherMutex) Unlock() { + locking.DelGLock(hasherprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *hasherMutex) NestedUnlock(i hasherlockNameIndex) { + locking.DelGLock(hasherprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func hasherinitLockNames() {} + +func init() { + hasherinitLockNames() + hasherprefixIndex = locking.NewMutexClass(reflect.TypeFor[hasherMutex](), hasherlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/keepalive_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/keepalive_mutex.go new file mode 100644 index 00000000..3922e30f --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/keepalive_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type keepaliveMutex struct { + mu sync.Mutex +} + +var keepaliveprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var keepalivelockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type keepalivelockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *keepaliveMutex) Lock() { + locking.AddGLock(keepaliveprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *keepaliveMutex) NestedLock(i keepalivelockNameIndex) { + locking.AddGLock(keepaliveprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *keepaliveMutex) Unlock() { + locking.DelGLock(keepaliveprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *keepaliveMutex) NestedUnlock(i keepalivelockNameIndex) { + locking.DelGLock(keepaliveprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func keepaliveinitLockNames() {} + +func init() { + keepaliveinitLockNames() + keepaliveprefixIndex = locking.NewMutexClass(reflect.TypeFor[keepaliveMutex](), keepalivelockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/last_error_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/last_error_mutex.go new file mode 100644 index 00000000..c3603c28 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/last_error_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type lastErrorMutex struct { + mu sync.Mutex +} + +var lastErrorprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var lastErrorlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type lastErrorlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *lastErrorMutex) Lock() { + locking.AddGLock(lastErrorprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *lastErrorMutex) NestedLock(i lastErrorlockNameIndex) { + locking.AddGLock(lastErrorprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *lastErrorMutex) Unlock() { + locking.DelGLock(lastErrorprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *lastErrorMutex) NestedUnlock(i lastErrorlockNameIndex) { + locking.DelGLock(lastErrorprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func lastErrorinitLockNames() {} + +func init() { + lastErrorinitLockNames() + lastErrorprefixIndex = locking.NewMutexClass(reflect.TypeFor[lastErrorMutex](), lastErrorlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/pending_processing_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/pending_processing_mutex.go new file mode 100644 index 00000000..24bf8a07 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/pending_processing_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type pendingProcessingMutex struct { + mu sync.Mutex +} + +var pendingProcessingprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var pendingProcessinglockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type pendingProcessinglockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *pendingProcessingMutex) Lock() { + locking.AddGLock(pendingProcessingprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *pendingProcessingMutex) NestedLock(i pendingProcessinglockNameIndex) { + locking.AddGLock(pendingProcessingprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *pendingProcessingMutex) Unlock() { + locking.DelGLock(pendingProcessingprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *pendingProcessingMutex) NestedUnlock(i pendingProcessinglockNameIndex) { + locking.DelGLock(pendingProcessingprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func pendingProcessinginitLockNames() {} + +func init() { + pendingProcessinginitLockNames() + pendingProcessingprefixIndex = locking.NewMutexClass(reflect.TypeFor[pendingProcessingMutex](), pendingProcessinglockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol.go index 73829ac4..ec80705d 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol.go @@ -23,7 +23,6 @@ import ( "strings" "time" - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/header/parse" @@ -90,7 +89,7 @@ const ( type protocol struct { stack *stack.Stack - mu sync.RWMutex `state:"nosave"` + mu protocolRWMutex `state:"nosave"` sackEnabled bool recovery tcpip.TCPRecovery delayEnabled bool @@ -109,6 +108,12 @@ type protocol struct { synRetries uint8 dispatcher dispatcher + // probe, if not nil, will be invoked any time an endpoint receives a + // TCP segment. + // + // This is immutable after creation. + probe TCPProbeFunc `state:"nosave"` + // The following secrets are initialized once and stay unchanged after. seqnumSecret [16]byte tsOffsetSecret [16]byte @@ -227,16 +232,26 @@ func replyWithReset(st *stack.Stack, s *segment, tos, ipv4TTL uint8, ipv6HopLimi ack = s.sequenceNumber.Add(s.logicalLen()) } - p := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: header.TCPMinimumSize + int(route.MaxHeaderLength())}) + var expOptVal uint16 + if s.ep != nil { + expOptVal = s.ep.getExperimentOptionValue(route) + } + hdrSize := header.TCPMinimumSize + int(route.MaxHeaderLength()) + if route.NetProto() == header.IPv6ProtocolNumber && expOptVal != 0 { + hdrSize += header.IPv6ExperimentHdrLength + } + p := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: hdrSize}) defer p.DecRef() + return sendTCP(route, tcpFields{ - id: s.id, - ttl: ttl, - tos: tos, - flags: flags, - seq: seq, - ack: ack, - rcvWnd: 0, + id: s.id, + ttl: ttl, + tos: tos, + flags: flags, + seq: seq, + ack: ack, + rcvWnd: 0, + expOptVal: expOptVal, }, p, stack.GSO{}, nil /* PacketOwner */) } @@ -508,6 +523,11 @@ func (p *protocol) Resume() { p.dispatcher.resume() } +// Restore implements stack.TransportProtocol.Restore. +func (p *protocol) Restore() { + p.dispatcher.start() +} + // Parse implements stack.TransportProtocol.Parse. func (*protocol) Parse(pkt *stack.PacketBuffer) bool { return parse.TCP(pkt) @@ -515,7 +535,19 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool { // NewProtocol returns a TCP transport protocol with Reno congestion control. func NewProtocol(s *stack.Stack) stack.TransportProtocol { - return newProtocol(s, ccReno) + return newProtocol(s, ccReno, nil) +} + +// NewProtocolProbe returns a TCP transport protocol with Reno congestion +// control and the given probe. +// +// The probe will be invoked on every segment received by TCP endpoints. The +// probe function is passed a copy of the TCP endpoint state before and after +// processing of the segment. +func NewProtocolProbe(probe TCPProbeFunc) func(*stack.Stack) stack.TransportProtocol { + return func(s *stack.Stack) stack.TransportProtocol { + return newProtocol(s, ccReno, probe) + } } // NewProtocolCUBIC returns a TCP transport protocol with CUBIC congestion @@ -523,10 +555,10 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol { // // TODO(b/345835636): Remove this and make CUBIC the default across the board. func NewProtocolCUBIC(s *stack.Stack) stack.TransportProtocol { - return newProtocol(s, ccCubic) + return newProtocol(s, ccCubic, nil) } -func newProtocol(s *stack.Stack, cc string) stack.TransportProtocol { +func newProtocol(s *stack.Stack, cc string, probe TCPProbeFunc) stack.TransportProtocol { rng := s.SecureRNG() var seqnumSecret [16]byte var tsOffsetSecret [16]byte @@ -562,6 +594,7 @@ func newProtocol(s *stack.Stack, cc string) stack.TransportProtocol { recovery: tcpip.TCPRACKLossDetection, seqnumSecret: seqnumSecret, tsOffsetSecret: tsOffsetSecret, + probe: probe, } p.dispatcher.init(s.InsecureRNG(), runtime.GOMAXPROCS(0)) return &p diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol_mutex.go new file mode 100644 index 00000000..e1a272e8 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/protocol_mutex.go @@ -0,0 +1,96 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// RWMutex is sync.RWMutex with the correctness validator. +type protocolRWMutex struct { + mu sync.RWMutex +} + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var protocollockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type protocollockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *protocolRWMutex) Lock() { + locking.AddGLock(protocolprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *protocolRWMutex) NestedLock(i protocollockNameIndex) { + locking.AddGLock(protocolprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *protocolRWMutex) Unlock() { + m.mu.Unlock() + locking.DelGLock(protocolprefixIndex, -1) +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *protocolRWMutex) NestedUnlock(i protocollockNameIndex) { + m.mu.Unlock() + locking.DelGLock(protocolprefixIndex, int(i)) +} + +// RLock locks m for reading. +// +checklocksignore +func (m *protocolRWMutex) RLock() { + locking.AddGLock(protocolprefixIndex, -1) + m.mu.RLock() +} + +// RUnlock undoes a single RLock call. +// +checklocksignore +func (m *protocolRWMutex) RUnlock() { + m.mu.RUnlock() + locking.DelGLock(protocolprefixIndex, -1) +} + +// RLockBypass locks m for reading without executing the validator. +// +checklocksignore +func (m *protocolRWMutex) RLockBypass() { + m.mu.RLock() +} + +// RUnlockBypass undoes a single RLockBypass call. +// +checklocksignore +func (m *protocolRWMutex) RUnlockBypass() { + m.mu.RUnlock() +} + +// DowngradeLock atomically unlocks rw for writing and locks it for reading. +// +checklocksignore +func (m *protocolRWMutex) DowngradeLock() { + m.mu.DowngradeLock() +} + +var protocolprefixIndex *locking.MutexClass + +// DO NOT REMOVE: The following function is automatically replaced. +func protocolinitLockNames() {} + +func init() { + protocolinitLockNames() + protocolprefixIndex = locking.NewMutexClass(reflect.TypeFor[protocolRWMutex](), protocollockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rack.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rack.go index 66ea6e5b..fd80dd77 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rack.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rack.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -47,7 +46,7 @@ const ( // // +stateify savable type rackControl struct { - stack.TCPRACKState + TCPRACKState // exitedRecovery indicates if the connection is exiting loss recovery. // This flag is set if the sender is leaving the recovery after @@ -162,6 +161,8 @@ func (s *sender) shouldSchedulePTO() bool { // schedulePTO schedules the probe timeout as defined in // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.5.1. +// +// +checklocks:s.ep.mu func (s *sender) schedulePTO() { pto := time.Second s.rtt.Lock() @@ -237,6 +238,8 @@ func (s *sender) probeTimerExpired() tcpip.Error { // detectTLPRecovery detects if recovery was accomplished by the loss probes // and updates TLP state accordingly. // See https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.3. +// +// +checklocks:s.ep.mu func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) { if !(s.ep.SACKPermitted && s.rc.tlpRxtOut) { return @@ -280,6 +283,8 @@ func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) { // been observed RACK uses reo_wnd of zero during loss recovery, in order to // retransmit quickly, or when the number of DUPACKs exceeds the classic // DUPACKthreshold. +// +// +checklocks:rc.snd.ep.mu func (rc *rackControl) updateRACKReorderWindow() { dsackSeen := rc.DSACKSeen snd := rc.snd @@ -353,6 +358,8 @@ func (rc *rackControl) exitRecovery() { // detectLoss marks the segment as lost if the reordering window has elapsed // and the ACK is not received. It will also arm the reorder timer. // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 Step 5. +// +// +checklocks:rc.snd.ep.mu func (rc *rackControl) detectLoss(rcvTime tcpip.MonotonicTime) int { var timeout time.Duration numLost := 0 diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv.go index 349f950f..a0f63854 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - "gvisor.dev/gvisor/pkg/tcpip/stack" ) // receiver holds the state necessary to receive TCP segments and turn them @@ -29,7 +28,7 @@ import ( // // +stateify savable type receiver struct { - stack.TCPReceiverState + TCPReceiverState ep *Endpoint // rcvWnd is the non-scaled receive window last advertised to the peer. @@ -55,7 +54,7 @@ type receiver struct { func newReceiver(ep *Endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver { return &receiver{ ep: ep, - TCPReceiverState: stack.TCPReceiverState{ + TCPReceiverState: TCPReceiverState{ RcvNxt: irs + 1, RcvAcc: irs.Add(rcvWnd + 1), RcvWndScale: rcvWndScale, @@ -187,7 +186,6 @@ func (r *receiver) getSendParams() (RcvNxt seqnum.Value, rcvWnd seqnum.Size) { // in such cases we may need to send an ack to indicate to our peer that it can // resume sending data. // +checklocks:r.ep.mu -// +checklocksalias:r.ep.snd.ep.mu=r.ep.mu func (r *receiver) nonZeroWindow() { // Immediately send an ack. r.ep.snd.sendAck() @@ -200,7 +198,6 @@ func (r *receiver) nonZeroWindow() { // Returns true if the segment was consumed, false if it cannot be consumed // yet because of a missing segment. // +checklocks:r.ep.mu -// +checklocksalias:r.ep.snd.ep.mu=r.ep.mu func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum.Size) bool { if segLen > 0 { // If the segment doesn't include the seqnum we're expecting to @@ -354,7 +351,6 @@ func (r *receiver) updateRTT() { } // +checklocks:r.ep.mu -// +checklocksalias:r.ep.snd.ep.mu=r.ep.mu func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err tcpip.Error) { r.ep.rcvQueueMu.Lock() rcvClosed := r.ep.RcvClosed || r.closed @@ -452,7 +448,6 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // handleRcvdSegment handles TCP segments directed at the connection managed by // r as they arrive. It is called by the protocol main loop. // +checklocks:r.ep.mu -// +checklocksalias:r.ep.snd.ep.mu=r.ep.mu func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) { state := r.ep.EndpointState() closed := r.ep.closed @@ -548,7 +543,6 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) { // handleTimeWaitSegment handles inbound segments received when the endpoint // has entered the TIME_WAIT state. // +checklocks:r.ep.mu -// +checklocksalias:r.ep.snd.ep.mu=r.ep.mu func (r *receiver) handleTimeWaitSegment(s *segment) (resetTimeWait bool, newSyn bool) { segSeq := s.sequenceNumber segLen := seqnum.Size(s.payloadSize()) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv_queue_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv_queue_mutex.go new file mode 100644 index 00000000..2ab4f03e --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rcv_queue_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type rcvQueueMutex struct { + mu sync.Mutex +} + +var rcvQueueprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var rcvQueuelockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type rcvQueuelockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *rcvQueueMutex) Lock() { + locking.AddGLock(rcvQueueprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *rcvQueueMutex) NestedLock(i rcvQueuelockNameIndex) { + locking.AddGLock(rcvQueueprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *rcvQueueMutex) Unlock() { + locking.DelGLock(rcvQueueprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *rcvQueueMutex) NestedUnlock(i rcvQueuelockNameIndex) { + locking.DelGLock(rcvQueueprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func rcvQueueinitLockNames() {} + +func init() { + rcvQueueinitLockNames() + rcvQueueprefixIndex = locking.NewMutexClass(reflect.TypeFor[rcvQueueMutex](), rcvQueuelockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/reno.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/reno.go index 2d1b011d..0f137a32 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/reno.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/reno.go @@ -35,6 +35,8 @@ func newRenoCC(s *sender) *renoState { // algorithm used by NewReno. If after adjusting the congestion window // we cross the SSthreshold then it will return the number of packets that // must be consumed in congestion avoidance mode. +// +// +checklocks:r.s.ep.mu func (r *renoState) updateSlowStart(packetsAcked int) int { // Don't let the congestion window cross into the congestion // avoidance range. @@ -51,6 +53,8 @@ func (r *renoState) updateSlowStart(packetsAcked int) int { // updateCongestionAvoidance will update congestion window in congestion // avoidance mode as described in RFC5681 section 3.1 +// +// +checklocks:r.s.ep.mu func (r *renoState) updateCongestionAvoidance(packetsAcked int) { // Consume the packets in congestion avoidance mode. r.s.SndCAAckCount += packetsAcked @@ -62,6 +66,8 @@ func (r *renoState) updateCongestionAvoidance(packetsAcked int) { // reduceSlowStartThreshold reduces the slow-start threshold per RFC 5681, // page 6, eq. 4. It is called when we detect congestion in the network. +// +// +checklocks:r.s.ep.mu func (r *renoState) reduceSlowStartThreshold() { r.s.Ssthresh = r.s.Outstanding / 2 if r.s.Ssthresh < 2 { @@ -73,6 +79,8 @@ func (r *renoState) reduceSlowStartThreshold() { // Update updates the congestion state based on the number of packets that // were acknowledged. // Update implements congestionControl.Update. +// +// +checklocks:r.s.ep.mu func (r *renoState) Update(packetsAcked int, _ time.Duration) { if r.s.SndCwnd < r.s.Ssthresh { packetsAcked = r.updateSlowStart(packetsAcked) @@ -84,6 +92,8 @@ func (r *renoState) Update(packetsAcked int, _ time.Duration) { } // HandleLossDetected implements congestionControl.HandleLossDetected. +// +// +checklocks:r.s.ep.mu func (r *renoState) HandleLossDetected() { // A retransmit was triggered due to nDupAckThreshold or when RACK // detected loss. Reduce our slow start threshold. @@ -91,6 +101,8 @@ func (r *renoState) HandleLossDetected() { } // HandleRTOExpired implements congestionControl.HandleRTOExpired. +// +// +checklocks:r.s.ep.mu func (r *renoState) HandleRTOExpired() { // We lost a packet, so reduce ssthresh. r.reduceSlowStartThreshold() diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rtt_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rtt_mutex.go new file mode 100644 index 00000000..1c63af54 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/rtt_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type rttMutex struct { + mu sync.Mutex +} + +var rttprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var rttlockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type rttlockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *rttMutex) Lock() { + locking.AddGLock(rttprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *rttMutex) NestedLock(i rttlockNameIndex) { + locking.AddGLock(rttprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *rttMutex) Unlock() { + locking.DelGLock(rttprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *rttMutex) NestedUnlock(i rttlockNameIndex) { + locking.DelGLock(rttprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func rttinitLockNames() {} + +func init() { + rttinitLockNames() + rttprefixIndex = locking.NewMutexClass(reflect.TypeFor[rttMutex](), rttlockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/sack_scoreboard.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/sack_scoreboard.go index fb7f4e3f..74713ec6 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/sack_scoreboard.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/sack_scoreboard.go @@ -34,6 +34,12 @@ const ( defaultBtreeDegree = 2 ) +// sackBlockLess is the comparison function for BTreeG, replacing the +// btree.Item interface method. +func sackBlockLess(a, b header.SACKBlock) bool { + return a.Start.LessThan(b.Start) +} + // SACKScoreboard stores a set of disjoint SACK ranges. // // +stateify savable @@ -47,22 +53,22 @@ type SACKScoreboard struct { // the TCP/IP headers and options. smss uint16 maxSACKED seqnum.Value - sacked seqnum.Size `state:"nosave"` - ranges *btree.BTree `state:"nosave"` + sacked seqnum.Size `state:"nosave"` + ranges *btree.BTreeG[header.SACKBlock] `state:"nosave"` } // NewSACKScoreboard returns a new SACK Scoreboard. func NewSACKScoreboard(smss uint16, iss seqnum.Value) *SACKScoreboard { return &SACKScoreboard{ smss: smss, - ranges: btree.New(defaultBtreeDegree), + ranges: btree.NewG[header.SACKBlock](defaultBtreeDegree, sackBlockLess), maxSACKED: iss, } } // Reset erases all known range information from the SACK scoreboard. func (s *SACKScoreboard) Reset() { - s.ranges = btree.New(defaultBtreeDegree) + s.ranges = btree.NewG[header.SACKBlock](defaultBtreeDegree, sackBlockLess) s.sacked = 0 } @@ -73,15 +79,14 @@ func (s *SACKScoreboard) Insert(r header.SACKBlock) { } // Check if we can merge the new range with a range before or after it. - var toDelete []btree.Item + var toDelete []header.SACKBlock if s.maxSACKED.LessThan(r.End - 1) { s.maxSACKED = r.End - 1 } - s.ranges.AscendGreaterOrEqual(r, func(i btree.Item) bool { - if i == r { + s.ranges.AscendGreaterOrEqual(r, func(sacked header.SACKBlock) bool { + if sacked == r { return true } - sacked := i.(header.SACKBlock) // There is a hole between these two SACK blocks, so we can't // merge anymore. if r.End.LessThan(sacked.Start) { @@ -96,21 +101,20 @@ func (s *SACKScoreboard) Insert(r header.SACKBlock) { if sacked.End.LessThan(r.End) { // sacked is contained in the newly inserted range. // Delete this block. - toDelete = append(toDelete, i) + toDelete = append(toDelete, sacked) return true } // sacked covers a range past end of the newly inserted // block. r.End = sacked.End - toDelete = append(toDelete, i) + toDelete = append(toDelete, sacked) return true }) - s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { - if i == r { + s.ranges.DescendLessOrEqual(r, func(sacked header.SACKBlock) bool { + if sacked == r { return true } - sacked := i.(header.SACKBlock) // sA------sE // rA----rE if sacked.End.LessThan(r.Start) { @@ -126,18 +130,17 @@ func (s *SACKScoreboard) Insert(r header.SACKBlock) { if r.End.LessThan(sacked.End) { r.End = sacked.End } - toDelete = append(toDelete, i) + toDelete = append(toDelete, sacked) return true }) - for _, i := range toDelete { - if sb := s.ranges.Delete(i); sb != nil { - sb := i.(header.SACKBlock) + for _, sb := range toDelete { + if _, ok := s.ranges.Delete(sb); ok { s.sacked -= sb.Start.Size(sb.End) } } - replaced := s.ranges.ReplaceOrInsert(r) - if replaced == nil { + _, replaced := s.ranges.ReplaceOrInsert(r) + if !replaced { s.sacked += r.Start.Size(r.End) } } @@ -150,8 +153,7 @@ func (s *SACKScoreboard) IsSACKED(r header.SACKBlock) bool { } found := false - s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { - sacked := i.(header.SACKBlock) + s.ranges.DescendLessOrEqual(r, func(sacked header.SACKBlock) bool { if sacked.End.LessThan(r.Start) { return false } @@ -168,8 +170,8 @@ func (s *SACKScoreboard) IsSACKED(r header.SACKBlock) bool { func (s *SACKScoreboard) String() string { var str strings.Builder str.WriteString("SACKScoreboard: {") - s.ranges.Ascend(func(i btree.Item) bool { - str.WriteString(fmt.Sprintf("%v,", i)) + s.ranges.Ascend(func(sb header.SACKBlock) bool { + fmt.Fprintf(&str, "%v,", sb) return true }) str.WriteString("}\n") @@ -181,15 +183,14 @@ func (s *SACKScoreboard) Delete(seq seqnum.Value) { if s.Empty() { return } - toDelete := []btree.Item{} - toInsert := []btree.Item{} + var toDelete []header.SACKBlock + var toInsert []header.SACKBlock r := header.SACKBlock{seq, seq.Add(1)} - s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { - if i == r { + s.ranges.DescendLessOrEqual(r, func(sb header.SACKBlock) bool { + if sb == r { return true } - sb := i.(header.SACKBlock) - toDelete = append(toDelete, i) + toDelete = append(toDelete, sb) if sb.End.LessThanEq(seq) { s.sacked -= sb.Start.Size(sb.End) } else { @@ -209,8 +210,8 @@ func (s *SACKScoreboard) Delete(seq seqnum.Value) { // Copy provides a copy of the SACK scoreboard. func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value) { - s.ranges.Ascend(func(i btree.Item) bool { - sackBlocks = append(sackBlocks, i.(header.SACKBlock)) + s.ranges.Ascend(func(sb header.SACKBlock) bool { + sackBlocks = append(sackBlocks, sb) return true }) return sackBlocks, s.maxSACKED @@ -232,8 +233,7 @@ func (s *SACKScoreboard) IsRangeLost(r header.SACKBlock) bool { // We need to check if the immediate lower (if any) sacked // range contains or partially overlaps with r. searchMore := true - s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { - sacked := i.(header.SACKBlock) + s.ranges.DescendLessOrEqual(r, func(sacked header.SACKBlock) bool { if sacked.Contains(r) { searchMore = false return false @@ -256,8 +256,7 @@ func (s *SACKScoreboard) IsRangeLost(r header.SACKBlock) bool { return isLost } - s.ranges.AscendGreaterOrEqual(r, func(i btree.Item) bool { - sacked := i.(header.SACKBlock) + s.ranges.AscendGreaterOrEqual(r, func(sacked header.SACKBlock) bool { if sacked.Contains(r) { return false } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment.go index 6de583da..a05a0767 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment.go @@ -130,7 +130,7 @@ func newIncomingSegment(id stack.TransportEndpointID, clock tcpip.Clock, pkt *st s.window = seqnum.Size(hdr.WindowSize()) s.rcvdTime = clock.NowMonotonic() s.dataMemSize = pkt.MemSize() - s.pkt = pkt.IncRef() + s.pkt = pkt.Clone() s.csumValid = csumValid if !s.pkt.RXChecksumValidated { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue.go index 6f003efc..c7669b06 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue.go @@ -14,16 +14,12 @@ package tcp -import ( - "gvisor.dev/gvisor/pkg/sync" -) - // segmentQueue is a bounded, thread-safe queue of TCP segments. // // +stateify savable type segmentQueue struct { - mu sync.Mutex `state:"nosave"` - list segmentList `state:"wait"` + mu segmentQueueMutex `state:"nosave"` + list segmentList `state:"wait"` ep *Endpoint frozen bool } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue_mutex.go new file mode 100644 index 00000000..59a25f51 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/segment_queue_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type segmentQueueMutex struct { + mu sync.Mutex +} + +var segmentQueueprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var segmentQueuelockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type segmentQueuelockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *segmentQueueMutex) Lock() { + locking.AddGLock(segmentQueueprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *segmentQueueMutex) NestedLock(i segmentQueuelockNameIndex) { + locking.AddGLock(segmentQueueprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *segmentQueueMutex) Unlock() { + locking.DelGLock(segmentQueueprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *segmentQueueMutex) NestedUnlock(i segmentQueuelockNameIndex) { + locking.DelGLock(segmentQueueprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func segmentQueueinitLockNames() {} + +func init() { + segmentQueueinitLockNames() + segmentQueueprefixIndex = locking.NewMutexClass(reflect.TypeFor[segmentQueueMutex](), segmentQueuelockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd.go index eb5beea0..6caedef0 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd.go @@ -21,7 +21,6 @@ import ( "time" "gvisor.dev/gvisor/pkg/buffer" - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" @@ -97,9 +96,12 @@ type lossRecovery interface { // sender holds the state necessary to send TCP segments. // +// +checklocksalias:rc.snd.ep.mu=ep.mu // +stateify savable type sender struct { - stack.TCPSenderState + // +checklocks:ep.mu + TCPSenderState + ep *Endpoint // lr is the loss recovery algorithm used by the sender. @@ -124,7 +126,9 @@ type sender struct { // writeList holds all writable data: both unsent data and // sent-but-unacknowledged data. Alternatively: it holds all bytes // starting from SND.UNA. - writeList segmentList + // + // +checklocks:ep.mu + writeList protectedWriteList // resendTimer is used for RTOs. resendTimer timer `state:"nosave"` @@ -180,26 +184,74 @@ type sender struct { corkTimer timer `state:"nosave"` } +// protectedWriteList wraps the write list, checking for invalid state when +// segments are added or removed. +// +// TODO(b/339664055): Revert once bug is fixed. +// +// +stateify savable +type protectedWriteList struct { + writeList segmentList + set map[*segment]struct{} +} + +// Front returns the front of the write list. +func (wl *protectedWriteList) Front() *segment { + return wl.writeList.Front() +} + +// Back returns the back of the write list. +func (wl *protectedWriteList) Back() *segment { + return wl.writeList.Back() +} + +// Remove removes seg from the write list. +func (wl *protectedWriteList) Remove(seg *segment) { + if _, ok := wl.set[seg]; !ok { + panic("segment not found write list") + } + wl.writeList.Remove(seg) + delete(wl.set, seg) +} + +// PushBack pushes seg onto the back of the write list. +func (wl *protectedWriteList) PushBack(seg *segment) { + if _, ok := wl.set[seg]; ok { + panic("segment already in write list") + } + wl.writeList.PushBack(seg) + wl.set[seg] = struct{}{} +} + +// InsertAfter inserts seg after before. +func (wl *protectedWriteList) InsertAfter(before, seg *segment) { + if _, ok := wl.set[seg]; ok { + panic("segment already in write list") + } + wl.writeList.InsertAfter(before, seg) + wl.set[seg] = struct{}{} +} + // rtt is a synchronization wrapper used to appease stateify. See the comment // in sender, where it is used. // // +stateify savable type rtt struct { - sync.Mutex `state:"nosave"` + rttMutex `state:"nosave"` - stack.TCPRTTState + TCPRTTState } // +checklocks:ep.mu -func newSender(ep *Endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint16, sndWndScale int) *sender { +func initSender(ep *Endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint16, sndWndScale int) { // The sender MUST reduce the TCP data length to account for any IP or // TCP options that it is including in the packets that it sends. // See: https://tools.ietf.org/html/rfc6691#section-2 maxPayloadSize := int(mss) - ep.maxOptionSize() - s := &sender{ + ep.snd = &sender{ ep: ep, - TCPSenderState: stack.TCPSenderState{ + TCPSenderState: TCPSenderState{ SndWnd: sndWnd, SndUna: iss + 1, SndNxt: iss + 1, @@ -207,7 +259,7 @@ func newSender(ep *Endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint LastSendTime: ep.stack.Clock().NowMonotonic(), MaxPayloadSize: maxPayloadSize, MaxSentAck: irs + 1, - FastRecovery: stack.TCPFastRecoveryState{ + FastRecovery: TCPFastRecoveryState{ // See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 1. Last: iss, HighRxt: iss, @@ -216,59 +268,61 @@ func newSender(ep *Endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint RTO: 1 * time.Second, }, gso: ep.gso.Type != stack.GSONone, + writeList: protectedWriteList{ + set: make(map[*segment]struct{}), + }, } - if s.gso { - s.ep.gso.MSS = uint16(maxPayloadSize) + if ep.snd.gso { + ep.snd.ep.gso.MSS = uint16(maxPayloadSize) } - s.cc = s.initCongestionControl(ep.cc) - s.lr = s.initLossRecovery() - s.rc.init(s, iss) + ep.snd.cc = ep.snd.initCongestionControl(ep.cc) + ep.snd.lr = ep.snd.initLossRecovery() + ep.snd.rc.init(ep.snd, iss) // A negative sndWndScale means that no scaling is in use, otherwise we // store the scaling value. if sndWndScale > 0 { - s.SndWndScale = uint8(sndWndScale) + ep.snd.SndWndScale = uint8(sndWndScale) } - s.resendTimer.init(s.ep.stack.Clock(), timerHandler(s.ep, s.retransmitTimerExpired)) - s.reorderTimer.init(s.ep.stack.Clock(), timerHandler(s.ep, s.rc.reorderTimerExpired)) - s.probeTimer.init(s.ep.stack.Clock(), timerHandler(s.ep, s.probeTimerExpired)) - s.corkTimer.init(s.ep.stack.Clock(), timerHandler(s.ep, s.corkTimerExpired)) + ep.snd.resendTimer.init(ep.snd.ep.stack.Clock(), timerHandler(ep.snd.ep, ep.snd.retransmitTimerExpired)) + ep.snd.reorderTimer.init(ep.snd.ep.stack.Clock(), timerHandler(ep.snd.ep, ep.snd.rc.reorderTimerExpired)) + ep.snd.probeTimer.init(ep.snd.ep.stack.Clock(), timerHandler(ep.snd.ep, ep.snd.probeTimerExpired)) + ep.snd.corkTimer.init(ep.snd.ep.stack.Clock(), timerHandler(ep.snd.ep, ep.snd.corkTimerExpired)) - s.ep.AssertLockHeld(ep) - s.updateMaxPayloadSize(int(ep.route.MTU()), 0) + ep.snd.updateMaxPayloadSize(int(ep.snd.ep.route.MTU()), 0) // Initialize SACK Scoreboard after updating max payload size as we use // the maxPayloadSize as the smss when determining if a segment is lost // etc. - s.ep.scoreboard = NewSACKScoreboard(uint16(s.MaxPayloadSize), iss) + ep.snd.ep.scoreboard = NewSACKScoreboard(uint16(ep.snd.MaxPayloadSize), iss) // Get Stack wide config. var minRTO tcpip.TCPMinRTOOption - if err := ep.stack.TransportProtocolOption(ProtocolNumber, &minRTO); err != nil { + if err := ep.snd.ep.stack.TransportProtocolOption(ProtocolNumber, &minRTO); err != nil { panic(fmt.Sprintf("unable to get minRTO from stack: %s", err)) } - s.minRTO = time.Duration(minRTO) + ep.snd.minRTO = time.Duration(minRTO) var maxRTO tcpip.TCPMaxRTOOption - if err := ep.stack.TransportProtocolOption(ProtocolNumber, &maxRTO); err != nil { + if err := ep.snd.ep.stack.TransportProtocolOption(ProtocolNumber, &maxRTO); err != nil { panic(fmt.Sprintf("unable to get maxRTO from stack: %s", err)) } - s.maxRTO = time.Duration(maxRTO) + ep.snd.maxRTO = time.Duration(maxRTO) var maxRetries tcpip.TCPMaxRetriesOption - if err := ep.stack.TransportProtocolOption(ProtocolNumber, &maxRetries); err != nil { + if err := ep.snd.ep.stack.TransportProtocolOption(ProtocolNumber, &maxRetries); err != nil { panic(fmt.Sprintf("unable to get maxRetries from stack: %s", err)) } - s.maxRetries = uint32(maxRetries) - - return s + ep.snd.maxRetries = uint32(maxRetries) } // initCongestionControl initializes the specified congestion control module and // returns a handle to it. It also initializes the sndCwnd and sndSsThresh to // their initial values. +// +// +checklocks:s.ep.mu func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionControlOption) congestionControl { s.SndCwnd = InitialCwnd s.Ssthresh = InitialSsthresh @@ -369,6 +423,8 @@ func (s *sender) sendAck() { // updateRTO updates the retransmit timeout when a new roud-trip time is // available. This is done in accordance with section 2 of RFC 6298. +// +// +checklocks:s.ep.mu func (s *sender) updateRTO(rtt time.Duration) { s.rtt.Lock() if !s.rtt.TCPRTTState.SRTTInited { @@ -418,6 +474,7 @@ func (s *sender) updateRTO(rtt time.Duration) { } s.RTO = s.rtt.TCPRTTState.SRTT + 4*s.rtt.TCPRTTState.RTTVar + s.RTTState = s.rtt.TCPRTTState s.rtt.Unlock() if s.RTO < s.minRTO { s.RTO = s.minRTO @@ -614,6 +671,8 @@ func (s *sender) pCount(seg *segment, maxPayloadSize int) int { // splitSeg splits a given segment at the size specified and inserts the // remainder as a new segment after the current one in the write list. +// +// +checklocks:s.ep.mu func (s *sender) splitSeg(seg *segment, size int) { if seg.payloadSize() <= size { return @@ -649,6 +708,8 @@ func (s *sender) splitSeg(seg *segment, size int) { // // rescueRtx will be true only if nextSeg is a rescue retransmission as // described by Step 4) of the NextSeg algorithm. +// +// +checklocks:s.ep.mu func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRtx bool) { var s3 *segment var s4 *segment @@ -905,17 +966,19 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se if seg.payloadSize() > available { // A negative value causes splitSeg to panic anyways, so just panic // earlier to get more information about the cause. - // TOOD(b/357457079): Remove this panic once the cause of negative values - // of "available" is understood. - if available < 0 { - panic(fmt.Sprintf("got available=%d, want available>=0. limit %d, s.MaxPayloadSize %d, seg.payloadSize() %d, gso.MaxSize %d, gso.MSS %d", available, limit, s.MaxPayloadSize, seg.payloadSize(), s.ep.gso.MaxSize, s.ep.gso.MSS)) - } s.splitSeg(seg, available) } segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.payloadSize())) } + // TODO(b/379932042): Below is the only place we update SND.NXT besides + // initialization. It's possible that we're increasing SND.NXT by + // trying to write a segment that isn't in the write list. + if _, ok := s.writeList.set[seg]; !ok { + panic("attempted to send segment not in write list") + } + s.sendSegment(seg) // Update sndNxt if we actually sent new data (as opposed to @@ -950,6 +1013,7 @@ func (s *sender) sendZeroWindowProbe() { s.resendTimer.enable(s.RTO) } +// +checklocks:s.ep.mu func (s *sender) enableZeroWindowProbing() { s.zeroWindowProbing = true // We piggyback the probing on the retransmit timer with the @@ -968,6 +1032,7 @@ func (s *sender) disableZeroWindowProbing() { s.resendTimer.disable() } +// +checklocks:s.ep.mu func (s *sender) postXmit(dataSent bool, shouldScheduleProbe bool) { if dataSent { // We sent data, so we should stop the keepalive timer to ensure @@ -1023,9 +1088,10 @@ func (s *sender) sendData() { var dataSent bool for seg := s.writeNext; seg != nil && s.Outstanding < s.SndCwnd; seg = seg.Next() { - cwndLimit := (s.SndCwnd - s.Outstanding) * s.MaxPayloadSize - if cwndLimit < limit { - limit = cwndLimit + // NOTE(gvisor.dev/issue/11632): Use uint64 to avoid overflow. + cwndLimit := uint64(s.SndCwnd-s.Outstanding) * uint64(s.MaxPayloadSize) + if cwndLimit < uint64(limit) { + limit = int(cwndLimit) } if s.isAssignedSequenceNumber(seg) && s.ep.SACKPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) { // Move writeNext along so that we don't try and scan data that @@ -1044,6 +1110,7 @@ func (s *sender) sendData() { s.postXmit(dataSent, true /* shouldScheduleProbe */) } +// +checklocks:s.ep.mu func (s *sender) enterRecovery() { // Initialize the variables used to detect spurious recovery after // entering recovery. @@ -1087,6 +1154,7 @@ func (s *sender) enterRecovery() { s.ep.stack.Stats().TCP.FastRecovery.Increment() } +// +checklocks:s.ep.mu func (s *sender) leaveRecovery() { s.FastRecovery.Active = false s.FastRecovery.MaxCwnd = 0 @@ -1109,6 +1177,8 @@ func (s *sender) isAssignedSequenceNumber(seg *segment) bool { // maintains the congestion window in number of packets and not bytes, so // SetPipe() here measures number of outstanding packets rather than actual // outstanding bytes in the network. +// +// +checklocks:s.ep.mu func (s *sender) SetPipe() { // If SACK isn't permitted or it is permitted but recovery is not active // then ignore pipe calculations. @@ -1162,6 +1232,8 @@ func (s *sender) SetPipe() { // shouldEnterRecovery returns true if the sender should enter fast recovery // based on dupAck count and sack scoreboard. // See RFC 6675 section 5. +// +// +checklocks:s.ep.mu func (s *sender) shouldEnterRecovery() bool { return s.DupAckCount >= nDupAckThreshold || (s.ep.SACKPermitted && s.ep.tcpRecovery&tcpip.TCPRACKLossDetection == 0 && s.ep.scoreboard.IsLost(s.SndUna)) @@ -1170,6 +1242,8 @@ func (s *sender) shouldEnterRecovery() bool { // detectLoss is called when an ack is received and returns whether a loss is // detected. It manages the state related to duplicate acks and determines if // a retransmit is needed according to the rules in RFC 6582 (NewReno). +// +// +checklocks:s.ep.mu func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { // We're not in fast recovery yet. @@ -1220,6 +1294,8 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { // isDupAck determines if seg is a duplicate ack as defined in // https://tools.ietf.org/html/rfc5681#section-2. +// +// +checklocks:s.ep.mu func (s *sender) isDupAck(seg *segment) bool { // A TCP that utilizes selective acknowledgments (SACKs) [RFC2018, RFC2883] // can leverage the SACK information to determine when an incoming ACK is a @@ -1250,6 +1326,8 @@ func (s *sender) isDupAck(seg *segment) bool { // // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 // steps 2 and 3. +// +// +checklocks:s.ep.mu func (s *sender) walkSACK(rcvdSeg *segment) bool { s.rc.setDSACKSeen(false) @@ -1367,6 +1445,7 @@ func (s *sender) recordRetransmitTS() { s.retransmitTS = s.ep.tsValNow() } +// +checklocks:s.ep.mu func (s *sender) detectSpuriousRecovery(hasDSACK bool, tsEchoReply uint32) { // Return if the sender has already detected spurious recovery. if s.spuriousRecovery { @@ -1431,7 +1510,6 @@ func (s *sender) inRecovery() bool { // handleRcvdSegment is called when a segment is received; it is responsible for // updating the send-related state. // +checklocks:s.ep.mu -// +checklocksalias:s.rc.snd.ep.mu=s.ep.mu func (s *sender) handleRcvdSegment(rcvdSeg *segment) { bestRTT := unknownRTT @@ -1572,7 +1650,8 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { // have no data, but do consume a sequence number. seg := s.writeList.Front() if seg == nil { - panic(fmt.Sprintf("invalid state: there are %d unacknowledged bytes left, but the write list is empty:\n%+v", ackLeft, s.TCPSenderState)) + panic(fmt.Sprintf("invalid state: there are %d unacknowledged bytes left, but the write list is empty:\n"+ + "TCPSenderState: %+v\nsender: %+v\nendpoint: %+v", ackLeft, s.TCPSenderState, s, s.ep)) } datalen := seg.logicalLen() @@ -1734,7 +1813,6 @@ func (s *sender) sendSegment(seg *segment) tcpip.Error { // sendSegmentFromPacketBuffer sends a new segment containing the given payload, // flags and sequence number. // +checklocks:s.ep.mu -// +checklocksalias:s.ep.rcv.ep.mu=s.ep.mu func (s *sender) sendSegmentFromPacketBuffer(pkt *stack.PacketBuffer, flags header.TCPFlags, seq seqnum.Value) tcpip.Error { s.LastSendTime = s.ep.stack.Clock().NowMonotonic() if seq == s.RTTMeasureSeqNum { @@ -1756,7 +1834,6 @@ func (s *sender) sendSegmentFromPacketBuffer(pkt *stack.PacketBuffer, flags head // sendEmptySegment sends a new empty segment, flags and sequence number. // +checklocks:s.ep.mu -// +checklocksalias:s.ep.rcv.ep.mu=s.ep.mu func (s *sender) sendEmptySegment(flags header.TCPFlags, seq seqnum.Value) tcpip.Error { s.LastSendTime = s.ep.stack.Clock().NowMonotonic() if seq == s.RTTMeasureSeqNum { diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd_queue_mutex.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd_queue_mutex.go new file mode 100644 index 00000000..a7e16317 --- /dev/null +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/snd_queue_mutex.go @@ -0,0 +1,64 @@ +package tcp + +import ( + "reflect" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/sync/locking" +) + +// Mutex is sync.Mutex with the correctness validator. +type sndQueueMutex struct { + mu sync.Mutex +} + +var sndQueueprefixIndex *locking.MutexClass + +// lockNames is a list of user-friendly lock names. +// Populated in init. +var sndQueuelockNames []string + +// lockNameIndex is used as an index passed to NestedLock and NestedUnlock, +// referring to an index within lockNames. +// Values are specified using the "consts" field of go_template_instance. +type sndQueuelockNameIndex int + +// DO NOT REMOVE: The following function automatically replaced with lock index constants. +// LOCK_NAME_INDEX_CONSTANTS +const () + +// Lock locks m. +// +checklocksignore +func (m *sndQueueMutex) Lock() { + locking.AddGLock(sndQueueprefixIndex, -1) + m.mu.Lock() +} + +// NestedLock locks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *sndQueueMutex) NestedLock(i sndQueuelockNameIndex) { + locking.AddGLock(sndQueueprefixIndex, int(i)) + m.mu.Lock() +} + +// Unlock unlocks m. +// +checklocksignore +func (m *sndQueueMutex) Unlock() { + locking.DelGLock(sndQueueprefixIndex, -1) + m.mu.Unlock() +} + +// NestedUnlock unlocks m knowing that another lock of the same type is held. +// +checklocksignore +func (m *sndQueueMutex) NestedUnlock(i sndQueuelockNameIndex) { + locking.DelGLock(sndQueueprefixIndex, int(i)) + m.mu.Unlock() +} + +// DO NOT REMOVE: The following function is automatically replaced. +func sndQueueinitLockNames() {} + +func init() { + sndQueueinitLockNames() + sndQueueprefixIndex = locking.NewMutexClass(reflect.TypeFor[sndQueueMutex](), sndQueuelockNames) +} diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/tcp.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/state.go similarity index 97% rename from vendor/gvisor.dev/gvisor/pkg/tcpip/stack/tcp.go rename to vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/state.go index f5273405..d6ddd79c 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/stack/tcp.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/state.go @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -package stack +package tcp import ( - "context" "time" "gvisor.dev/gvisor/pkg/atomicbitops" @@ -25,19 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) -// contextID is this package's type for context.Context.Value keys. -type contextID int - -const ( - // CtxRestoreStack is a Context.Value key for the stack to be used in restore. - CtxRestoreStack contextID = iota -) - -// RestoreStackFromContext returns the stack to be used during restore. -func RestoreStackFromContext(ctx context.Context) *Stack { - return ctx.Value(CtxRestoreStack).(*Stack) -} - // TCPProbeFunc is the expected function type for a TCP probe function to be // passed to stack.AddTCPProbe. type TCPProbeFunc func(s *TCPEndpointState) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/tcp_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/tcp_state_autogen.go index 7bfef39e..6d19c0e5 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/tcp_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/tcp/tcp_state_autogen.go @@ -25,8 +25,8 @@ func (a *acceptQueue) beforeSave() {} // +checklocksignore func (a *acceptQueue) StateSave(stateSinkObject state.Sink) { a.beforeSave() - var endpointsValue []*Endpoint - endpointsValue = a.saveEndpoints() + endpointsValue := a.saveEndpoints() + _ = ([]*Endpoint)(endpointsValue) stateSinkObject.SaveValue(0, endpointsValue) stateSinkObject.Save(1, &a.pendingEndpoints) stateSinkObject.Save(2, &a.capacity) @@ -177,7 +177,6 @@ func (p *processor) StateTypeName() string { func (p *processor) StateFields() []string { return []string{ "epQ", - "sleeper", } } @@ -187,7 +186,6 @@ func (p *processor) beforeSave() {} func (p *processor) StateSave(stateSinkObject state.Sink) { p.beforeSave() stateSinkObject.Save(0, &p.epQ) - stateSinkObject.Save(1, &p.sleeper) } func (p *processor) afterLoad(context.Context) {} @@ -195,7 +193,6 @@ func (p *processor) afterLoad(context.Context) {} // +checklocksignore func (p *processor) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &p.epQ) - stateSourceObject.Load(1, &p.sleeper) } func (d *dispatcher) StateTypeName() string { @@ -445,6 +442,8 @@ func (e *Endpoint) StateFields() []string { "TCPEndpointStateInner", "TransportEndpointInfo", "DefaultSocketOptionsHandler", + "stack", + "protocol", "waiterQueue", "hardError", "lastError", @@ -454,6 +453,8 @@ func (e *Endpoint) StateFields() []string { "rcvQueue", "state", "connectionDirectionState", + "isPortReserved", + "isRegistered", "boundNICID", "ipv4TTL", "ipv6HopLimit", @@ -494,66 +495,74 @@ func (e *Endpoint) StateFields() []string { "ops", "lastOutOfWindowAckTime", "pmtud", + "alsoBindToV4", + "terminateAtRestore", } } // +checklocksignore func (e *Endpoint) StateSave(stateSinkObject state.Sink) { e.beforeSave() - var stateValue EndpointState - stateValue = e.saveState() - stateSinkObject.SaveValue(10, stateValue) + stateValue := e.saveState() + _ = (EndpointState)(stateValue) + stateSinkObject.SaveValue(12, stateValue) stateSinkObject.Save(0, &e.TCPEndpointStateInner) stateSinkObject.Save(1, &e.TransportEndpointInfo) stateSinkObject.Save(2, &e.DefaultSocketOptionsHandler) - stateSinkObject.Save(3, &e.waiterQueue) - stateSinkObject.Save(4, &e.hardError) - stateSinkObject.Save(5, &e.lastError) - stateSinkObject.Save(6, &e.TCPRcvBufState) - stateSinkObject.Save(7, &e.rcvMemUsed) - stateSinkObject.Save(8, &e.ownedByUser) - stateSinkObject.Save(9, &e.rcvQueue) - stateSinkObject.Save(11, &e.connectionDirectionState) - stateSinkObject.Save(12, &e.boundNICID) - stateSinkObject.Save(13, &e.ipv4TTL) - stateSinkObject.Save(14, &e.ipv6HopLimit) - stateSinkObject.Save(15, &e.isConnectNotified) - stateSinkObject.Save(16, &e.h) - stateSinkObject.Save(17, &e.portFlags) - stateSinkObject.Save(18, &e.boundBindToDevice) - stateSinkObject.Save(19, &e.boundPortFlags) - stateSinkObject.Save(20, &e.boundDest) - stateSinkObject.Save(21, &e.effectiveNetProtos) - stateSinkObject.Save(22, &e.recentTSTime) - stateSinkObject.Save(23, &e.shutdownFlags) - stateSinkObject.Save(24, &e.tcpRecovery) - stateSinkObject.Save(25, &e.sack) - stateSinkObject.Save(26, &e.delay) - stateSinkObject.Save(27, &e.scoreboard) - stateSinkObject.Save(28, &e.segmentQueue) - stateSinkObject.Save(29, &e.userMSS) - stateSinkObject.Save(30, &e.maxSynRetries) - stateSinkObject.Save(31, &e.windowClamp) - stateSinkObject.Save(32, &e.sndQueueInfo) - stateSinkObject.Save(33, &e.cc) - stateSinkObject.Save(34, &e.keepalive) - stateSinkObject.Save(35, &e.userTimeout) - stateSinkObject.Save(36, &e.deferAccept) - stateSinkObject.Save(37, &e.acceptQueue) - stateSinkObject.Save(38, &e.rcv) - stateSinkObject.Save(39, &e.snd) - stateSinkObject.Save(40, &e.connectingAddress) - stateSinkObject.Save(41, &e.amss) - stateSinkObject.Save(42, &e.sendTOS) - stateSinkObject.Save(43, &e.gso) - stateSinkObject.Save(44, &e.stats) - stateSinkObject.Save(45, &e.tcpLingerTimeout) - stateSinkObject.Save(46, &e.closed) - stateSinkObject.Save(47, &e.txHash) - stateSinkObject.Save(48, &e.owner) - stateSinkObject.Save(49, &e.ops) - stateSinkObject.Save(50, &e.lastOutOfWindowAckTime) - stateSinkObject.Save(51, &e.pmtud) + stateSinkObject.Save(3, &e.stack) + stateSinkObject.Save(4, &e.protocol) + stateSinkObject.Save(5, &e.waiterQueue) + stateSinkObject.Save(6, &e.hardError) + stateSinkObject.Save(7, &e.lastError) + stateSinkObject.Save(8, &e.TCPRcvBufState) + stateSinkObject.Save(9, &e.rcvMemUsed) + stateSinkObject.Save(10, &e.ownedByUser) + stateSinkObject.Save(11, &e.rcvQueue) + stateSinkObject.Save(13, &e.connectionDirectionState) + stateSinkObject.Save(14, &e.isPortReserved) + stateSinkObject.Save(15, &e.isRegistered) + stateSinkObject.Save(16, &e.boundNICID) + stateSinkObject.Save(17, &e.ipv4TTL) + stateSinkObject.Save(18, &e.ipv6HopLimit) + stateSinkObject.Save(19, &e.isConnectNotified) + stateSinkObject.Save(20, &e.h) + stateSinkObject.Save(21, &e.portFlags) + stateSinkObject.Save(22, &e.boundBindToDevice) + stateSinkObject.Save(23, &e.boundPortFlags) + stateSinkObject.Save(24, &e.boundDest) + stateSinkObject.Save(25, &e.effectiveNetProtos) + stateSinkObject.Save(26, &e.recentTSTime) + stateSinkObject.Save(27, &e.shutdownFlags) + stateSinkObject.Save(28, &e.tcpRecovery) + stateSinkObject.Save(29, &e.sack) + stateSinkObject.Save(30, &e.delay) + stateSinkObject.Save(31, &e.scoreboard) + stateSinkObject.Save(32, &e.segmentQueue) + stateSinkObject.Save(33, &e.userMSS) + stateSinkObject.Save(34, &e.maxSynRetries) + stateSinkObject.Save(35, &e.windowClamp) + stateSinkObject.Save(36, &e.sndQueueInfo) + stateSinkObject.Save(37, &e.cc) + stateSinkObject.Save(38, &e.keepalive) + stateSinkObject.Save(39, &e.userTimeout) + stateSinkObject.Save(40, &e.deferAccept) + stateSinkObject.Save(41, &e.acceptQueue) + stateSinkObject.Save(42, &e.rcv) + stateSinkObject.Save(43, &e.snd) + stateSinkObject.Save(44, &e.connectingAddress) + stateSinkObject.Save(45, &e.amss) + stateSinkObject.Save(46, &e.sendTOS) + stateSinkObject.Save(47, &e.gso) + stateSinkObject.Save(48, &e.stats) + stateSinkObject.Save(49, &e.tcpLingerTimeout) + stateSinkObject.Save(50, &e.closed) + stateSinkObject.Save(51, &e.txHash) + stateSinkObject.Save(52, &e.owner) + stateSinkObject.Save(53, &e.ops) + stateSinkObject.Save(54, &e.lastOutOfWindowAckTime) + stateSinkObject.Save(55, &e.pmtud) + stateSinkObject.Save(56, &e.alsoBindToV4) + stateSinkObject.Save(57, &e.terminateAtRestore) } // +checklocksignore @@ -561,55 +570,61 @@ func (e *Endpoint) StateLoad(ctx context.Context, stateSourceObject state.Source stateSourceObject.Load(0, &e.TCPEndpointStateInner) stateSourceObject.Load(1, &e.TransportEndpointInfo) stateSourceObject.Load(2, &e.DefaultSocketOptionsHandler) - stateSourceObject.LoadWait(3, &e.waiterQueue) - stateSourceObject.Load(4, &e.hardError) - stateSourceObject.Load(5, &e.lastError) - stateSourceObject.Load(6, &e.TCPRcvBufState) - stateSourceObject.Load(7, &e.rcvMemUsed) - stateSourceObject.Load(8, &e.ownedByUser) - stateSourceObject.LoadWait(9, &e.rcvQueue) - stateSourceObject.Load(11, &e.connectionDirectionState) - stateSourceObject.Load(12, &e.boundNICID) - stateSourceObject.Load(13, &e.ipv4TTL) - stateSourceObject.Load(14, &e.ipv6HopLimit) - stateSourceObject.Load(15, &e.isConnectNotified) - stateSourceObject.Load(16, &e.h) - stateSourceObject.Load(17, &e.portFlags) - stateSourceObject.Load(18, &e.boundBindToDevice) - stateSourceObject.Load(19, &e.boundPortFlags) - stateSourceObject.Load(20, &e.boundDest) - stateSourceObject.Load(21, &e.effectiveNetProtos) - stateSourceObject.Load(22, &e.recentTSTime) - stateSourceObject.Load(23, &e.shutdownFlags) - stateSourceObject.Load(24, &e.tcpRecovery) - stateSourceObject.Load(25, &e.sack) - stateSourceObject.Load(26, &e.delay) - stateSourceObject.Load(27, &e.scoreboard) - stateSourceObject.LoadWait(28, &e.segmentQueue) - stateSourceObject.Load(29, &e.userMSS) - stateSourceObject.Load(30, &e.maxSynRetries) - stateSourceObject.Load(31, &e.windowClamp) - stateSourceObject.Load(32, &e.sndQueueInfo) - stateSourceObject.Load(33, &e.cc) - stateSourceObject.Load(34, &e.keepalive) - stateSourceObject.Load(35, &e.userTimeout) - stateSourceObject.Load(36, &e.deferAccept) - stateSourceObject.Load(37, &e.acceptQueue) - stateSourceObject.LoadWait(38, &e.rcv) - stateSourceObject.LoadWait(39, &e.snd) - stateSourceObject.Load(40, &e.connectingAddress) - stateSourceObject.Load(41, &e.amss) - stateSourceObject.Load(42, &e.sendTOS) - stateSourceObject.Load(43, &e.gso) - stateSourceObject.Load(44, &e.stats) - stateSourceObject.Load(45, &e.tcpLingerTimeout) - stateSourceObject.Load(46, &e.closed) - stateSourceObject.Load(47, &e.txHash) - stateSourceObject.Load(48, &e.owner) - stateSourceObject.Load(49, &e.ops) - stateSourceObject.Load(50, &e.lastOutOfWindowAckTime) - stateSourceObject.Load(51, &e.pmtud) - stateSourceObject.LoadValue(10, new(EndpointState), func(y any) { e.loadState(ctx, y.(EndpointState)) }) + stateSourceObject.Load(3, &e.stack) + stateSourceObject.Load(4, &e.protocol) + stateSourceObject.LoadWait(5, &e.waiterQueue) + stateSourceObject.Load(6, &e.hardError) + stateSourceObject.Load(7, &e.lastError) + stateSourceObject.Load(8, &e.TCPRcvBufState) + stateSourceObject.Load(9, &e.rcvMemUsed) + stateSourceObject.Load(10, &e.ownedByUser) + stateSourceObject.LoadWait(11, &e.rcvQueue) + stateSourceObject.Load(13, &e.connectionDirectionState) + stateSourceObject.Load(14, &e.isPortReserved) + stateSourceObject.Load(15, &e.isRegistered) + stateSourceObject.Load(16, &e.boundNICID) + stateSourceObject.Load(17, &e.ipv4TTL) + stateSourceObject.Load(18, &e.ipv6HopLimit) + stateSourceObject.Load(19, &e.isConnectNotified) + stateSourceObject.Load(20, &e.h) + stateSourceObject.Load(21, &e.portFlags) + stateSourceObject.Load(22, &e.boundBindToDevice) + stateSourceObject.Load(23, &e.boundPortFlags) + stateSourceObject.Load(24, &e.boundDest) + stateSourceObject.Load(25, &e.effectiveNetProtos) + stateSourceObject.Load(26, &e.recentTSTime) + stateSourceObject.Load(27, &e.shutdownFlags) + stateSourceObject.Load(28, &e.tcpRecovery) + stateSourceObject.Load(29, &e.sack) + stateSourceObject.Load(30, &e.delay) + stateSourceObject.Load(31, &e.scoreboard) + stateSourceObject.LoadWait(32, &e.segmentQueue) + stateSourceObject.Load(33, &e.userMSS) + stateSourceObject.Load(34, &e.maxSynRetries) + stateSourceObject.Load(35, &e.windowClamp) + stateSourceObject.Load(36, &e.sndQueueInfo) + stateSourceObject.Load(37, &e.cc) + stateSourceObject.Load(38, &e.keepalive) + stateSourceObject.Load(39, &e.userTimeout) + stateSourceObject.Load(40, &e.deferAccept) + stateSourceObject.Load(41, &e.acceptQueue) + stateSourceObject.LoadWait(42, &e.rcv) + stateSourceObject.LoadWait(43, &e.snd) + stateSourceObject.Load(44, &e.connectingAddress) + stateSourceObject.Load(45, &e.amss) + stateSourceObject.Load(46, &e.sendTOS) + stateSourceObject.Load(47, &e.gso) + stateSourceObject.Load(48, &e.stats) + stateSourceObject.Load(49, &e.tcpLingerTimeout) + stateSourceObject.Load(50, &e.closed) + stateSourceObject.Load(51, &e.txHash) + stateSourceObject.Load(52, &e.owner) + stateSourceObject.Load(53, &e.ops) + stateSourceObject.Load(54, &e.lastOutOfWindowAckTime) + stateSourceObject.Load(55, &e.pmtud) + stateSourceObject.Load(56, &e.alsoBindToV4) + stateSourceObject.Load(57, &e.terminateAtRestore) + stateSourceObject.LoadValue(12, new(EndpointState), func(y any) { e.loadState(ctx, y.(EndpointState)) }) stateSourceObject.AfterLoad(func() { e.afterLoad(ctx) }) } @@ -952,8 +967,8 @@ func (s *segment) beforeSave() {} // +checklocksignore func (s *segment) StateSave(stateSinkObject state.Sink) { s.beforeSave() - var optionsValue []byte - optionsValue = s.saveOptions() + optionsValue := s.saveOptions() + _ = ([]byte)(optionsValue) stateSinkObject.SaveValue(12, optionsValue) stateSinkObject.Save(0, &s.segmentEntry) stateSinkObject.Save(1, &s.segmentRefs) @@ -1106,6 +1121,34 @@ func (s *sender) StateLoad(ctx context.Context, stateSourceObject state.Source) stateSourceObject.Load(16, &s.startCork) } +func (wl *protectedWriteList) StateTypeName() string { + return "pkg/tcpip/transport/tcp.protectedWriteList" +} + +func (wl *protectedWriteList) StateFields() []string { + return []string{ + "writeList", + "set", + } +} + +func (wl *protectedWriteList) beforeSave() {} + +// +checklocksignore +func (wl *protectedWriteList) StateSave(stateSinkObject state.Sink) { + wl.beforeSave() + stateSinkObject.Save(0, &wl.writeList) + stateSinkObject.Save(1, &wl.set) +} + +func (wl *protectedWriteList) afterLoad(context.Context) {} + +// +checklocksignore +func (wl *protectedWriteList) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &wl.writeList) + stateSourceObject.Load(1, &wl.set) +} + func (r *rtt) StateTypeName() string { return "pkg/tcpip/transport/tcp.rtt" } @@ -1131,6 +1174,586 @@ func (r *rtt) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &r.TCPRTTState) } +func (t *TCPCubicState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPCubicState" +} + +func (t *TCPCubicState) StateFields() []string { + return []string{ + "WLastMax", + "WMax", + "T", + "TimeSinceLastCongestion", + "C", + "K", + "Beta", + "WC", + "WEst", + "EndSeq", + "CurrRTT", + "LastRTT", + "SampleCount", + "LastAck", + "RoundStart", + } +} + +func (t *TCPCubicState) beforeSave() {} + +// +checklocksignore +func (t *TCPCubicState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.WLastMax) + stateSinkObject.Save(1, &t.WMax) + stateSinkObject.Save(2, &t.T) + stateSinkObject.Save(3, &t.TimeSinceLastCongestion) + stateSinkObject.Save(4, &t.C) + stateSinkObject.Save(5, &t.K) + stateSinkObject.Save(6, &t.Beta) + stateSinkObject.Save(7, &t.WC) + stateSinkObject.Save(8, &t.WEst) + stateSinkObject.Save(9, &t.EndSeq) + stateSinkObject.Save(10, &t.CurrRTT) + stateSinkObject.Save(11, &t.LastRTT) + stateSinkObject.Save(12, &t.SampleCount) + stateSinkObject.Save(13, &t.LastAck) + stateSinkObject.Save(14, &t.RoundStart) +} + +func (t *TCPCubicState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPCubicState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.WLastMax) + stateSourceObject.Load(1, &t.WMax) + stateSourceObject.Load(2, &t.T) + stateSourceObject.Load(3, &t.TimeSinceLastCongestion) + stateSourceObject.Load(4, &t.C) + stateSourceObject.Load(5, &t.K) + stateSourceObject.Load(6, &t.Beta) + stateSourceObject.Load(7, &t.WC) + stateSourceObject.Load(8, &t.WEst) + stateSourceObject.Load(9, &t.EndSeq) + stateSourceObject.Load(10, &t.CurrRTT) + stateSourceObject.Load(11, &t.LastRTT) + stateSourceObject.Load(12, &t.SampleCount) + stateSourceObject.Load(13, &t.LastAck) + stateSourceObject.Load(14, &t.RoundStart) +} + +func (t *TCPRACKState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPRACKState" +} + +func (t *TCPRACKState) StateFields() []string { + return []string{ + "XmitTime", + "EndSequence", + "FACK", + "RTT", + "Reord", + "DSACKSeen", + "ReoWnd", + "ReoWndIncr", + "ReoWndPersist", + "RTTSeq", + } +} + +func (t *TCPRACKState) beforeSave() {} + +// +checklocksignore +func (t *TCPRACKState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.XmitTime) + stateSinkObject.Save(1, &t.EndSequence) + stateSinkObject.Save(2, &t.FACK) + stateSinkObject.Save(3, &t.RTT) + stateSinkObject.Save(4, &t.Reord) + stateSinkObject.Save(5, &t.DSACKSeen) + stateSinkObject.Save(6, &t.ReoWnd) + stateSinkObject.Save(7, &t.ReoWndIncr) + stateSinkObject.Save(8, &t.ReoWndPersist) + stateSinkObject.Save(9, &t.RTTSeq) +} + +func (t *TCPRACKState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPRACKState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.XmitTime) + stateSourceObject.Load(1, &t.EndSequence) + stateSourceObject.Load(2, &t.FACK) + stateSourceObject.Load(3, &t.RTT) + stateSourceObject.Load(4, &t.Reord) + stateSourceObject.Load(5, &t.DSACKSeen) + stateSourceObject.Load(6, &t.ReoWnd) + stateSourceObject.Load(7, &t.ReoWndIncr) + stateSourceObject.Load(8, &t.ReoWndPersist) + stateSourceObject.Load(9, &t.RTTSeq) +} + +func (t *TCPEndpointID) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPEndpointID" +} + +func (t *TCPEndpointID) StateFields() []string { + return []string{ + "LocalPort", + "LocalAddress", + "RemotePort", + "RemoteAddress", + } +} + +func (t *TCPEndpointID) beforeSave() {} + +// +checklocksignore +func (t *TCPEndpointID) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.LocalPort) + stateSinkObject.Save(1, &t.LocalAddress) + stateSinkObject.Save(2, &t.RemotePort) + stateSinkObject.Save(3, &t.RemoteAddress) +} + +func (t *TCPEndpointID) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPEndpointID) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.LocalPort) + stateSourceObject.Load(1, &t.LocalAddress) + stateSourceObject.Load(2, &t.RemotePort) + stateSourceObject.Load(3, &t.RemoteAddress) +} + +func (t *TCPFastRecoveryState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPFastRecoveryState" +} + +func (t *TCPFastRecoveryState) StateFields() []string { + return []string{ + "Active", + "First", + "Last", + "MaxCwnd", + "HighRxt", + "RescueRxt", + } +} + +func (t *TCPFastRecoveryState) beforeSave() {} + +// +checklocksignore +func (t *TCPFastRecoveryState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Active) + stateSinkObject.Save(1, &t.First) + stateSinkObject.Save(2, &t.Last) + stateSinkObject.Save(3, &t.MaxCwnd) + stateSinkObject.Save(4, &t.HighRxt) + stateSinkObject.Save(5, &t.RescueRxt) +} + +func (t *TCPFastRecoveryState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPFastRecoveryState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Active) + stateSourceObject.Load(1, &t.First) + stateSourceObject.Load(2, &t.Last) + stateSourceObject.Load(3, &t.MaxCwnd) + stateSourceObject.Load(4, &t.HighRxt) + stateSourceObject.Load(5, &t.RescueRxt) +} + +func (t *TCPReceiverState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPReceiverState" +} + +func (t *TCPReceiverState) StateFields() []string { + return []string{ + "RcvNxt", + "RcvAcc", + "RcvWndScale", + "PendingBufUsed", + } +} + +func (t *TCPReceiverState) beforeSave() {} + +// +checklocksignore +func (t *TCPReceiverState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.RcvNxt) + stateSinkObject.Save(1, &t.RcvAcc) + stateSinkObject.Save(2, &t.RcvWndScale) + stateSinkObject.Save(3, &t.PendingBufUsed) +} + +func (t *TCPReceiverState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPReceiverState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.RcvNxt) + stateSourceObject.Load(1, &t.RcvAcc) + stateSourceObject.Load(2, &t.RcvWndScale) + stateSourceObject.Load(3, &t.PendingBufUsed) +} + +func (t *TCPRTTState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPRTTState" +} + +func (t *TCPRTTState) StateFields() []string { + return []string{ + "SRTT", + "RTTVar", + "SRTTInited", + } +} + +func (t *TCPRTTState) beforeSave() {} + +// +checklocksignore +func (t *TCPRTTState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.SRTT) + stateSinkObject.Save(1, &t.RTTVar) + stateSinkObject.Save(2, &t.SRTTInited) +} + +func (t *TCPRTTState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPRTTState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.SRTT) + stateSourceObject.Load(1, &t.RTTVar) + stateSourceObject.Load(2, &t.SRTTInited) +} + +func (t *TCPSenderState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPSenderState" +} + +func (t *TCPSenderState) StateFields() []string { + return []string{ + "LastSendTime", + "DupAckCount", + "SndCwnd", + "Ssthresh", + "SndCAAckCount", + "Outstanding", + "SackedOut", + "SndWnd", + "SndUna", + "SndNxt", + "RTTMeasureSeqNum", + "RTTMeasureTime", + "Closed", + "RTO", + "RTTState", + "MaxPayloadSize", + "SndWndScale", + "MaxSentAck", + "FastRecovery", + "Cubic", + "RACKState", + "RetransmitTS", + "SpuriousRecovery", + } +} + +func (t *TCPSenderState) beforeSave() {} + +// +checklocksignore +func (t *TCPSenderState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.LastSendTime) + stateSinkObject.Save(1, &t.DupAckCount) + stateSinkObject.Save(2, &t.SndCwnd) + stateSinkObject.Save(3, &t.Ssthresh) + stateSinkObject.Save(4, &t.SndCAAckCount) + stateSinkObject.Save(5, &t.Outstanding) + stateSinkObject.Save(6, &t.SackedOut) + stateSinkObject.Save(7, &t.SndWnd) + stateSinkObject.Save(8, &t.SndUna) + stateSinkObject.Save(9, &t.SndNxt) + stateSinkObject.Save(10, &t.RTTMeasureSeqNum) + stateSinkObject.Save(11, &t.RTTMeasureTime) + stateSinkObject.Save(12, &t.Closed) + stateSinkObject.Save(13, &t.RTO) + stateSinkObject.Save(14, &t.RTTState) + stateSinkObject.Save(15, &t.MaxPayloadSize) + stateSinkObject.Save(16, &t.SndWndScale) + stateSinkObject.Save(17, &t.MaxSentAck) + stateSinkObject.Save(18, &t.FastRecovery) + stateSinkObject.Save(19, &t.Cubic) + stateSinkObject.Save(20, &t.RACKState) + stateSinkObject.Save(21, &t.RetransmitTS) + stateSinkObject.Save(22, &t.SpuriousRecovery) +} + +func (t *TCPSenderState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPSenderState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.LastSendTime) + stateSourceObject.Load(1, &t.DupAckCount) + stateSourceObject.Load(2, &t.SndCwnd) + stateSourceObject.Load(3, &t.Ssthresh) + stateSourceObject.Load(4, &t.SndCAAckCount) + stateSourceObject.Load(5, &t.Outstanding) + stateSourceObject.Load(6, &t.SackedOut) + stateSourceObject.Load(7, &t.SndWnd) + stateSourceObject.Load(8, &t.SndUna) + stateSourceObject.Load(9, &t.SndNxt) + stateSourceObject.Load(10, &t.RTTMeasureSeqNum) + stateSourceObject.Load(11, &t.RTTMeasureTime) + stateSourceObject.Load(12, &t.Closed) + stateSourceObject.Load(13, &t.RTO) + stateSourceObject.Load(14, &t.RTTState) + stateSourceObject.Load(15, &t.MaxPayloadSize) + stateSourceObject.Load(16, &t.SndWndScale) + stateSourceObject.Load(17, &t.MaxSentAck) + stateSourceObject.Load(18, &t.FastRecovery) + stateSourceObject.Load(19, &t.Cubic) + stateSourceObject.Load(20, &t.RACKState) + stateSourceObject.Load(21, &t.RetransmitTS) + stateSourceObject.Load(22, &t.SpuriousRecovery) +} + +func (t *TCPSACKInfo) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPSACKInfo" +} + +func (t *TCPSACKInfo) StateFields() []string { + return []string{ + "Blocks", + "ReceivedBlocks", + "MaxSACKED", + } +} + +func (t *TCPSACKInfo) beforeSave() {} + +// +checklocksignore +func (t *TCPSACKInfo) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Blocks) + stateSinkObject.Save(1, &t.ReceivedBlocks) + stateSinkObject.Save(2, &t.MaxSACKED) +} + +func (t *TCPSACKInfo) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPSACKInfo) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Blocks) + stateSourceObject.Load(1, &t.ReceivedBlocks) + stateSourceObject.Load(2, &t.MaxSACKED) +} + +func (r *RcvBufAutoTuneParams) StateTypeName() string { + return "pkg/tcpip/transport/tcp.RcvBufAutoTuneParams" +} + +func (r *RcvBufAutoTuneParams) StateFields() []string { + return []string{ + "MeasureTime", + "CopiedBytes", + "PrevCopiedBytes", + "RcvBufSize", + "RTT", + "RTTVar", + "RTTMeasureSeqNumber", + "RTTMeasureTime", + "Disabled", + } +} + +func (r *RcvBufAutoTuneParams) beforeSave() {} + +// +checklocksignore +func (r *RcvBufAutoTuneParams) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.MeasureTime) + stateSinkObject.Save(1, &r.CopiedBytes) + stateSinkObject.Save(2, &r.PrevCopiedBytes) + stateSinkObject.Save(3, &r.RcvBufSize) + stateSinkObject.Save(4, &r.RTT) + stateSinkObject.Save(5, &r.RTTVar) + stateSinkObject.Save(6, &r.RTTMeasureSeqNumber) + stateSinkObject.Save(7, &r.RTTMeasureTime) + stateSinkObject.Save(8, &r.Disabled) +} + +func (r *RcvBufAutoTuneParams) afterLoad(context.Context) {} + +// +checklocksignore +func (r *RcvBufAutoTuneParams) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.MeasureTime) + stateSourceObject.Load(1, &r.CopiedBytes) + stateSourceObject.Load(2, &r.PrevCopiedBytes) + stateSourceObject.Load(3, &r.RcvBufSize) + stateSourceObject.Load(4, &r.RTT) + stateSourceObject.Load(5, &r.RTTVar) + stateSourceObject.Load(6, &r.RTTMeasureSeqNumber) + stateSourceObject.Load(7, &r.RTTMeasureTime) + stateSourceObject.Load(8, &r.Disabled) +} + +func (t *TCPRcvBufState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPRcvBufState" +} + +func (t *TCPRcvBufState) StateFields() []string { + return []string{ + "RcvBufUsed", + "RcvAutoParams", + "RcvClosed", + } +} + +func (t *TCPRcvBufState) beforeSave() {} + +// +checklocksignore +func (t *TCPRcvBufState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.RcvBufUsed) + stateSinkObject.Save(1, &t.RcvAutoParams) + stateSinkObject.Save(2, &t.RcvClosed) +} + +func (t *TCPRcvBufState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPRcvBufState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.RcvBufUsed) + stateSourceObject.Load(1, &t.RcvAutoParams) + stateSourceObject.Load(2, &t.RcvClosed) +} + +func (t *TCPSndBufState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPSndBufState" +} + +func (t *TCPSndBufState) StateFields() []string { + return []string{ + "SndBufSize", + "SndBufUsed", + "SndClosed", + "PacketTooBigCount", + "SndMTU", + "AutoTuneSndBufDisabled", + } +} + +func (t *TCPSndBufState) beforeSave() {} + +// +checklocksignore +func (t *TCPSndBufState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.SndBufSize) + stateSinkObject.Save(1, &t.SndBufUsed) + stateSinkObject.Save(2, &t.SndClosed) + stateSinkObject.Save(3, &t.PacketTooBigCount) + stateSinkObject.Save(4, &t.SndMTU) + stateSinkObject.Save(5, &t.AutoTuneSndBufDisabled) +} + +func (t *TCPSndBufState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPSndBufState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.SndBufSize) + stateSourceObject.Load(1, &t.SndBufUsed) + stateSourceObject.Load(2, &t.SndClosed) + stateSourceObject.Load(3, &t.PacketTooBigCount) + stateSourceObject.Load(4, &t.SndMTU) + stateSourceObject.Load(5, &t.AutoTuneSndBufDisabled) +} + +func (t *TCPEndpointStateInner) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPEndpointStateInner" +} + +func (t *TCPEndpointStateInner) StateFields() []string { + return []string{ + "TSOffset", + "SACKPermitted", + "SendTSOk", + "RecentTS", + } +} + +func (t *TCPEndpointStateInner) beforeSave() {} + +// +checklocksignore +func (t *TCPEndpointStateInner) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.TSOffset) + stateSinkObject.Save(1, &t.SACKPermitted) + stateSinkObject.Save(2, &t.SendTSOk) + stateSinkObject.Save(3, &t.RecentTS) +} + +func (t *TCPEndpointStateInner) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPEndpointStateInner) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.TSOffset) + stateSourceObject.Load(1, &t.SACKPermitted) + stateSourceObject.Load(2, &t.SendTSOk) + stateSourceObject.Load(3, &t.RecentTS) +} + +func (t *TCPEndpointState) StateTypeName() string { + return "pkg/tcpip/transport/tcp.TCPEndpointState" +} + +func (t *TCPEndpointState) StateFields() []string { + return []string{ + "TCPEndpointStateInner", + "ID", + "SegTime", + "RcvBufState", + "SndBufState", + "SACK", + "Receiver", + "Sender", + } +} + +func (t *TCPEndpointState) beforeSave() {} + +// +checklocksignore +func (t *TCPEndpointState) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.TCPEndpointStateInner) + stateSinkObject.Save(1, &t.ID) + stateSinkObject.Save(2, &t.SegTime) + stateSinkObject.Save(3, &t.RcvBufState) + stateSinkObject.Save(4, &t.SndBufState) + stateSinkObject.Save(5, &t.SACK) + stateSinkObject.Save(6, &t.Receiver) + stateSinkObject.Save(7, &t.Sender) +} + +func (t *TCPEndpointState) afterLoad(context.Context) {} + +// +checklocksignore +func (t *TCPEndpointState) StateLoad(ctx context.Context, stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.TCPEndpointStateInner) + stateSourceObject.Load(1, &t.ID) + stateSourceObject.Load(2, &t.SegTime) + stateSourceObject.Load(3, &t.RcvBufState) + stateSourceObject.Load(4, &t.SndBufState) + stateSourceObject.Load(5, &t.SACK) + stateSourceObject.Load(6, &t.Receiver) + stateSourceObject.Load(7, &t.Sender) +} + func (l *endpointList) StateTypeName() string { return "pkg/tcpip/transport/tcp.endpointList" } @@ -1292,7 +1915,21 @@ func init() { state.Register((*segment)(nil)) state.Register((*segmentQueue)(nil)) state.Register((*sender)(nil)) + state.Register((*protectedWriteList)(nil)) state.Register((*rtt)(nil)) + state.Register((*TCPCubicState)(nil)) + state.Register((*TCPRACKState)(nil)) + state.Register((*TCPEndpointID)(nil)) + state.Register((*TCPFastRecoveryState)(nil)) + state.Register((*TCPReceiverState)(nil)) + state.Register((*TCPRTTState)(nil)) + state.Register((*TCPSenderState)(nil)) + state.Register((*TCPSACKInfo)(nil)) + state.Register((*RcvBufAutoTuneParams)(nil)) + state.Register((*TCPRcvBufState)(nil)) + state.Register((*TCPSndBufState)(nil)) + state.Register((*TCPEndpointStateInner)(nil)) + state.Register((*TCPEndpointState)(nil)) state.Register((*endpointList)(nil)) state.Register((*endpointEntry)(nil)) state.Register((*segmentList)(nil)) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint.go index f8e30579..bd9dbbf0 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint.go @@ -61,7 +61,7 @@ type endpoint struct { // The following fields are initialized at creation time and do not // change throughout the lifetime of the endpoint. - stack *stack.Stack `state:"manual"` + stack *stack.Stack waiterQueue *waiter.Queue net network.Endpoint stats tcpip.TransportEndpointStats @@ -160,11 +160,16 @@ func (e *endpoint) Abort() { // associated with it. func (e *endpoint) Close() { e.mu.Lock() + defer e.mu.Unlock() + e.closeLocked() +} +// Preconditions: e.mu is locked. +// +checklocks:e.mu +func (e *endpoint) closeLocked() { switch state := e.net.State(); state { case transport.DatagramEndpointStateInitial: case transport.DatagramEndpointStateClosed: - e.mu.Unlock() return case transport.DatagramEndpointStateBound, transport.DatagramEndpointStateConnected: id := e.net.Info().ID @@ -201,7 +206,6 @@ func (e *endpoint) Close() { e.net.Shutdown() e.net.Close() e.readShutdown = true - e.mu.Unlock() e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents) } @@ -290,6 +294,10 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult if opts.NeedRemoteAddr { res.RemoteAddr = p.senderAddress } + if opts.NeedReceivedExperimentOption { + expOptVal, _ := p.pkt.ExperimentOptionValue() + res.ReceivedExperimentOption = expOptVal + } n, err := p.pkt.Data().ReadTo(dst, opts.Peek) if n == 0 && err != nil { @@ -459,9 +467,9 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp dataSz := p.Len() pktInfo := udpInfo.ctx.PacketInfo() - pkt := udpInfo.ctx.TryNewPacketBufferFromPayloader(header.UDPMinimumSize+int(pktInfo.MaxHeaderLength), p) - if pkt == nil { - return 0, &tcpip.ErrWouldBlock{} + pkt, err := udpInfo.ctx.TryNewPacketBufferFromPayloader(header.UDPMinimumSize+int(pktInfo.MaxHeaderLength), p) + if err != nil { + return 0, err } defer pkt.DecRef() @@ -952,7 +960,9 @@ func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketB Addr: id.LocalAddress, Port: hdr.DestinationPort(), }, - pkt: pkt.IncRef(), + // We need to clone the packet because ReadTo modifies the write index of + // the underlying buffer. Clone does not copy the data, just the metadata. + pkt: pkt.Clone(), } e.rcvList.PushBack(packet) e.rcvBufSize += pkt.Data().Size() diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint_state.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint_state.go index 488e4660..ddfc2e85 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint_state.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/endpoint_state.go @@ -16,9 +16,9 @@ package udp import ( "context" - "fmt" "time" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport" @@ -36,7 +36,11 @@ func (p *udpPacket) loadReceivedAt(_ context.Context, nsec int64) { // afterLoad is invoked by stateify. func (e *endpoint) afterLoad(ctx context.Context) { - stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + if e.stack.IsSaveRestoreEnabled() { + e.stack.RegisterRestoredEndpoint(e) + } else { + stack.RestoreStackFromContext(ctx).RegisterRestoredEndpoint(e) + } } // beforeSave is invoked by stateify. @@ -47,13 +51,21 @@ func (e *endpoint) beforeSave() { // Restore implements tcpip.RestoredEndpoint.Restore. func (e *endpoint) Restore(s *stack.Stack) { - e.thaw() - e.mu.Lock() defer e.mu.Unlock() - e.net.Resume(s) + if err := e.net.Resume(s); err != nil { + log.Warningf("Closing the UDP endpoint as it cannot be restored, err: %v", err) + e.closeLocked() + return + } + // Unfreeze the endpoint to handle packets. + e.frozen = false + if e.stack.IsSaveRestoreEnabled() { + e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) + return + } e.stack = s e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) @@ -69,12 +81,12 @@ func (e *endpoint) Restore(s *stack.Stack) { id.RemotePort = e.remotePort id, e.boundBindToDevice, err = e.registerWithStack(e.effectiveNetProtos, id) if err != nil { - panic(err) + panic("registering udp endpoint with the stack failed during restore") } e.localPort = id.LocalPort e.remotePort = id.RemotePort default: - panic(fmt.Sprintf("unhandled state = %s", state)) + panic("unhandled state") } } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/forwarder.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/forwarder.go index 7950abe5..6e2b22e1 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/forwarder.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/forwarder.go @@ -20,19 +20,24 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// ForwarderHandler handles incoming requests. Returning true marks the +// request as handled, returning false marks the request as unhandled. +// Stack may send an ICMP port unreachable message for unhandled requests. +type ForwarderHandler func(*ForwarderRequest) (handled bool) + // Forwarder is a session request forwarder, which allows clients to decide // what to do with a session request, for example: ignore it, or process it. // // The canonical way of using it is to pass the Forwarder.HandlePacket function // to stack.SetTransportProtocolHandler. type Forwarder struct { - handler func(*ForwarderRequest) + handler ForwarderHandler stack *stack.Stack } // NewForwarder allocates and initializes a new forwarder. -func NewForwarder(s *stack.Stack, handler func(*ForwarderRequest)) *Forwarder { +func NewForwarder(s *stack.Stack, handler ForwarderHandler) *Forwarder { return &Forwarder{ stack: s, handler: handler, @@ -44,13 +49,11 @@ func NewForwarder(s *stack.Stack, handler func(*ForwarderRequest)) *Forwarder { // This function is expected to be passed as an argument to the // stack.SetTransportProtocolHandler function. func (f *Forwarder) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool { - f.handler(&ForwarderRequest{ + return f.handler(&ForwarderRequest{ stack: f.stack, id: id, - pkt: pkt.IncRef(), + pkt: pkt.Clone(), }) - - return true } // ForwarderRequest represents a session request received by the forwarder and @@ -62,6 +65,15 @@ type ForwarderRequest struct { pkt *stack.PacketBuffer } +// NewForwarderRequest creates a new ForwarderRequest. +func NewForwarderRequest(stack *stack.Stack, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *ForwarderRequest { + return &ForwarderRequest{ + stack: stack, + id: id, + pkt: pkt, + } +} + // ID returns the 4-tuple (src address, src port, dst address, dst port) that // represents the session request. func (r *ForwarderRequest) ID() stack.TransportEndpointID { @@ -76,15 +88,17 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, netHdr := r.pkt.Network() if err := ep.net.Bind(tcpip.FullAddress{NIC: r.pkt.NICID, Addr: netHdr.DestinationAddress(), Port: r.id.LocalPort}); err != nil { + ep.closeLocked() return nil, err } if err := ep.net.Connect(tcpip.FullAddress{NIC: r.pkt.NICID, Addr: netHdr.SourceAddress(), Port: r.id.RemotePort}); err != nil { + ep.closeLocked() return nil, err } if err := r.stack.RegisterTransportEndpoint([]tcpip.NetworkProtocolNumber{r.pkt.NetworkProtocolNumber}, ProtocolNumber, r.id, ep, ep.portFlags, tcpip.NICID(ep.ops.GetBindToDevice())); err != nil { - ep.Close() + ep.closeLocked() return nil, err } diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/protocol.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/protocol.go index 49870ab8..0f52b13f 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/protocol.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/protocol.go @@ -124,6 +124,9 @@ func (*protocol) Pause() {} // Resume implements stack.TransportProtocol.Resume. func (*protocol) Resume() {} +// Restore implements stack.TransportProtocol.Restore. +func (*protocol) Restore() {} + // Parse implements stack.TransportProtocol.Parse. func (*protocol) Parse(pkt *stack.PacketBuffer) bool { return parse.UDP(pkt) diff --git a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/udp_state_autogen.go b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/udp_state_autogen.go index e10d21cd..3194d8f3 100644 --- a/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/udp_state_autogen.go +++ b/vendor/gvisor.dev/gvisor/pkg/tcpip/transport/udp/udp_state_autogen.go @@ -31,8 +31,8 @@ func (p *udpPacket) beforeSave() {} // +checklocksignore func (p *udpPacket) StateSave(stateSinkObject state.Sink) { p.beforeSave() - var receivedAtValue int64 - receivedAtValue = p.saveReceivedAt() + receivedAtValue := p.saveReceivedAt() + _ = (int64)(receivedAtValue) stateSinkObject.SaveValue(6, receivedAtValue) stateSinkObject.Save(0, &p.udpPacketEntry) stateSinkObject.Save(1, &p.netProto) @@ -66,6 +66,7 @@ func (e *endpoint) StateTypeName() string { func (e *endpoint) StateFields() []string { return []string{ "DefaultSocketOptionsHandler", + "stack", "waiterQueue", "net", "stats", @@ -90,45 +91,47 @@ func (e *endpoint) StateFields() []string { func (e *endpoint) StateSave(stateSinkObject state.Sink) { e.beforeSave() stateSinkObject.Save(0, &e.DefaultSocketOptionsHandler) - stateSinkObject.Save(1, &e.waiterQueue) - stateSinkObject.Save(2, &e.net) - stateSinkObject.Save(3, &e.stats) - stateSinkObject.Save(4, &e.ops) - stateSinkObject.Save(5, &e.rcvReady) - stateSinkObject.Save(6, &e.rcvList) - stateSinkObject.Save(7, &e.rcvBufSize) - stateSinkObject.Save(8, &e.rcvClosed) - stateSinkObject.Save(9, &e.lastError) - stateSinkObject.Save(10, &e.portFlags) - stateSinkObject.Save(11, &e.boundBindToDevice) - stateSinkObject.Save(12, &e.boundPortFlags) - stateSinkObject.Save(13, &e.readShutdown) - stateSinkObject.Save(14, &e.effectiveNetProtos) - stateSinkObject.Save(15, &e.frozen) - stateSinkObject.Save(16, &e.localPort) - stateSinkObject.Save(17, &e.remotePort) + stateSinkObject.Save(1, &e.stack) + stateSinkObject.Save(2, &e.waiterQueue) + stateSinkObject.Save(3, &e.net) + stateSinkObject.Save(4, &e.stats) + stateSinkObject.Save(5, &e.ops) + stateSinkObject.Save(6, &e.rcvReady) + stateSinkObject.Save(7, &e.rcvList) + stateSinkObject.Save(8, &e.rcvBufSize) + stateSinkObject.Save(9, &e.rcvClosed) + stateSinkObject.Save(10, &e.lastError) + stateSinkObject.Save(11, &e.portFlags) + stateSinkObject.Save(12, &e.boundBindToDevice) + stateSinkObject.Save(13, &e.boundPortFlags) + stateSinkObject.Save(14, &e.readShutdown) + stateSinkObject.Save(15, &e.effectiveNetProtos) + stateSinkObject.Save(16, &e.frozen) + stateSinkObject.Save(17, &e.localPort) + stateSinkObject.Save(18, &e.remotePort) } // +checklocksignore func (e *endpoint) StateLoad(ctx context.Context, stateSourceObject state.Source) { stateSourceObject.Load(0, &e.DefaultSocketOptionsHandler) - stateSourceObject.Load(1, &e.waiterQueue) - stateSourceObject.Load(2, &e.net) - stateSourceObject.Load(3, &e.stats) - stateSourceObject.Load(4, &e.ops) - stateSourceObject.Load(5, &e.rcvReady) - stateSourceObject.Load(6, &e.rcvList) - stateSourceObject.Load(7, &e.rcvBufSize) - stateSourceObject.Load(8, &e.rcvClosed) - stateSourceObject.Load(9, &e.lastError) - stateSourceObject.Load(10, &e.portFlags) - stateSourceObject.Load(11, &e.boundBindToDevice) - stateSourceObject.Load(12, &e.boundPortFlags) - stateSourceObject.Load(13, &e.readShutdown) - stateSourceObject.Load(14, &e.effectiveNetProtos) - stateSourceObject.Load(15, &e.frozen) - stateSourceObject.Load(16, &e.localPort) - stateSourceObject.Load(17, &e.remotePort) + stateSourceObject.Load(1, &e.stack) + stateSourceObject.Load(2, &e.waiterQueue) + stateSourceObject.Load(3, &e.net) + stateSourceObject.Load(4, &e.stats) + stateSourceObject.Load(5, &e.ops) + stateSourceObject.Load(6, &e.rcvReady) + stateSourceObject.Load(7, &e.rcvList) + stateSourceObject.Load(8, &e.rcvBufSize) + stateSourceObject.Load(9, &e.rcvClosed) + stateSourceObject.Load(10, &e.lastError) + stateSourceObject.Load(11, &e.portFlags) + stateSourceObject.Load(12, &e.boundBindToDevice) + stateSourceObject.Load(13, &e.boundPortFlags) + stateSourceObject.Load(14, &e.readShutdown) + stateSourceObject.Load(15, &e.effectiveNetProtos) + stateSourceObject.Load(16, &e.frozen) + stateSourceObject.Load(17, &e.localPort) + stateSourceObject.Load(18, &e.remotePort) stateSourceObject.AfterLoad(func() { e.afterLoad(ctx) }) } diff --git a/vendor/gvisor.dev/gvisor/pkg/waiter/waiter.go b/vendor/gvisor.dev/gvisor/pkg/waiter/waiter.go index 1b47ae1b..785992fc 100644 --- a/vendor/gvisor.dev/gvisor/pkg/waiter/waiter.go +++ b/vendor/gvisor.dev/gvisor/pkg/waiter/waiter.go @@ -154,6 +154,15 @@ func (e *Entry) Init(eventListener EventListener, mask EventMask) { e.mask = mask } +// SetQueuedMask changes the entry mask. +// +// Preconditions: The Entry must be registered to the given Queue. +func (e *Entry) SetQueuedMask(q *Queue, mask EventMask) { + q.mu.Lock() + e.mask = mask + q.mu.Unlock() +} + // Mask returns the entry mask. func (e *Entry) Mask() EventMask { return e.mask @@ -201,6 +210,12 @@ func NewFunctionEntry(mask EventMask, fn func(EventMask)) (e Entry) { return e } +// NoopListener is an EventListener that does nothing. +type NoopListener struct{} + +// NotifyEvent implements EventListener.NotifyEvent. +func (NoopListener) NotifyEvent(mask EventMask) {} + // Queue represents the wait queue where waiters can be added and // notifiers can notify them when events happen. // @@ -259,45 +274,23 @@ func (q *Queue) IsEmpty() bool { return q.list.Front() == nil } -// AlwaysReady implements the Waitable interface but is always ready. Embedding -// this struct into another struct makes it implement the boilerplate empty -// functions automatically. -type AlwaysReady struct { -} - -// Readiness always returns the input mask because this object is always ready. -func (*AlwaysReady) Readiness(mask EventMask) EventMask { - return mask -} - -// EventRegister doesn't do anything because this object doesn't need to issue -// notifications because its readiness never changes. -func (*AlwaysReady) EventRegister(*Entry) error { - return nil -} - -// EventUnregister doesn't do anything because this object doesn't need to issue -// notifications because its readiness never changes. -func (*AlwaysReady) EventUnregister(e *Entry) { -} - // NeverReady implements the Waitable interface but is never ready. Otherwise, // this is exactly the same as AlwaysReady. type NeverReady struct { } -// Readiness always returns the input mask because this object is always ready. -func (*NeverReady) Readiness(mask EventMask) EventMask { - return mask +// Readiness always returns 0 because this object is never ready. +func (*NeverReady) Readiness(EventMask) EventMask { + return 0 } // EventRegister doesn't do anything because this object doesn't need to issue // notifications because its readiness never changes. -func (*NeverReady) EventRegister(e *Entry) error { +func (*NeverReady) EventRegister(*Entry) error { return nil } // EventUnregister doesn't do anything because this object doesn't need to issue // notifications because its readiness never changes. -func (*NeverReady) EventUnregister(e *Entry) { +func (*NeverReady) EventUnregister(*Entry) { } diff --git a/vendor/modules.txt b/vendor/modules.txt index 59048cef..7bc981f0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -163,6 +163,9 @@ golang.org/x/crypto/internal/poly1305 golang.org/x/crypto/ssh golang.org/x/crypto/ssh/internal/bcrypt_pbkdf golang.org/x/crypto/ssh/knownhosts +# golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa +## explicit; go 1.20 +golang.org/x/exp/constraints # golang.org/x/mod v0.35.0 ## explicit; go 1.25.0 golang.org/x/mod/semver @@ -205,8 +208,8 @@ golang.org/x/text/internal/utf8internal golang.org/x/text/language golang.org/x/text/runes golang.org/x/text/transform -# golang.org/x/time v0.5.0 -## explicit; go 1.18 +# golang.org/x/time v0.12.0 +## explicit; go 1.23.0 golang.org/x/time/rate # golang.org/x/tools v0.43.0 ## explicit; go 1.25.0 @@ -235,8 +238,8 @@ gopkg.in/tomb.v1 # gopkg.in/yaml.v3 v3.0.1 ## explicit gopkg.in/yaml.v3 -# gvisor.dev/gvisor v0.0.0-20240916094835-a174eb65023f -## explicit; go 1.22.0 +# gvisor.dev/gvisor v0.0.0-20260413194555-9680d69bf798 +## explicit; go 1.25.5 gvisor.dev/gvisor/pkg/atomicbitops gvisor.dev/gvisor/pkg/bits gvisor.dev/gvisor/pkg/buffer From 6f646c44e51bdb2fd1aad20463ef6c98012f5854 Mon Sep 17 00:00:00 2001 From: Yevhen Vydolob Date: Thu, 15 Jan 2026 09:29:57 +0200 Subject: [PATCH 3/7] Update gitignore Add .vscode and .idea ide/editor folders and few files created during test Signed-off-by: Yevhen Vydolob --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 385a2475..61990b86 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ bin/ capture.pcap tmp/ test/qcon.log +.idea/ +.vscode/ +test-qemu/qcon.log +test-qemu/ovmf_vars.fd From cf0e470794a0ed364ca4f7db33f5d9ff6ead6bc7 Mon Sep 17 00:00:00 2001 From: Yevhen Vydolob Date: Thu, 15 Jan 2026 09:45:30 +0200 Subject: [PATCH 4/7] Add ICMP forwarding support The current implementation supports forwarding only ICMP Echo(ping) packets, as OS not allow to send arbitrary packets without escalating privileges. All other ICMP packets are dropped(ignored) Changes in 'icmp_packet.go' and 'icmp.go' was mostly maded by cursor. Signed-off-by: Yevhen Vydolob --- go.mod | 2 +- pkg/services/forwarder/icmp.go | 73 ++++ pkg/services/forwarder/icmp_forwarder.go | 40 +++ pkg/services/forwarder/icmp_packet.go | 253 ++++++++++++++ pkg/services/forwarder/icmp_packet_unix.go | 42 +++ pkg/services/forwarder/icmp_packet_windows.go | 62 ++++ pkg/virtualnetwork/services.go | 3 + test-vfkit/basic_test.go | 5 + vendor/golang.org/x/net/icmp/dstunreach.go | 59 ++++ vendor/golang.org/x/net/icmp/echo.go | 173 ++++++++++ vendor/golang.org/x/net/icmp/endpoint.go | 113 ++++++ vendor/golang.org/x/net/icmp/extension.go | 170 +++++++++ vendor/golang.org/x/net/icmp/helper_posix.go | 75 ++++ vendor/golang.org/x/net/icmp/interface.go | 322 ++++++++++++++++++ vendor/golang.org/x/net/icmp/ipv4.go | 68 ++++ vendor/golang.org/x/net/icmp/ipv6.go | 23 ++ vendor/golang.org/x/net/icmp/listen_posix.go | 105 ++++++ vendor/golang.org/x/net/icmp/listen_stub.go | 35 ++ vendor/golang.org/x/net/icmp/message.go | 162 +++++++++ vendor/golang.org/x/net/icmp/messagebody.go | 52 +++ vendor/golang.org/x/net/icmp/mpls.go | 77 +++++ vendor/golang.org/x/net/icmp/multipart.go | 129 +++++++ vendor/golang.org/x/net/icmp/packettoobig.go | 43 +++ vendor/golang.org/x/net/icmp/paramprob.go | 72 ++++ vendor/golang.org/x/net/icmp/sys_freebsd.go | 11 + vendor/golang.org/x/net/icmp/timeexceeded.go | 57 ++++ vendor/modules.txt | 1 + 27 files changed, 2226 insertions(+), 1 deletion(-) create mode 100644 pkg/services/forwarder/icmp.go create mode 100644 pkg/services/forwarder/icmp_forwarder.go create mode 100644 pkg/services/forwarder/icmp_packet.go create mode 100644 pkg/services/forwarder/icmp_packet_unix.go create mode 100644 pkg/services/forwarder/icmp_packet_windows.go create mode 100644 vendor/golang.org/x/net/icmp/dstunreach.go create mode 100644 vendor/golang.org/x/net/icmp/echo.go create mode 100644 vendor/golang.org/x/net/icmp/endpoint.go create mode 100644 vendor/golang.org/x/net/icmp/extension.go create mode 100644 vendor/golang.org/x/net/icmp/helper_posix.go create mode 100644 vendor/golang.org/x/net/icmp/interface.go create mode 100644 vendor/golang.org/x/net/icmp/ipv4.go create mode 100644 vendor/golang.org/x/net/icmp/ipv6.go create mode 100644 vendor/golang.org/x/net/icmp/listen_posix.go create mode 100644 vendor/golang.org/x/net/icmp/listen_stub.go create mode 100644 vendor/golang.org/x/net/icmp/message.go create mode 100644 vendor/golang.org/x/net/icmp/messagebody.go create mode 100644 vendor/golang.org/x/net/icmp/mpls.go create mode 100644 vendor/golang.org/x/net/icmp/multipart.go create mode 100644 vendor/golang.org/x/net/icmp/packettoobig.go create mode 100644 vendor/golang.org/x/net/icmp/paramprob.go create mode 100644 vendor/golang.org/x/net/icmp/sys_freebsd.go create mode 100644 vendor/golang.org/x/net/icmp/timeexceeded.go diff --git a/go.mod b/go.mod index ba33f6ac..070a3f7c 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/vishvananda/netlink v1.3.1 golang.org/x/crypto v0.50.0 golang.org/x/mod v0.35.0 + golang.org/x/net v0.52.0 golang.org/x/sync v0.20.0 golang.org/x/sys v0.43.0 gopkg.in/yaml.v3 v3.0.1 @@ -45,7 +46,6 @@ require ( github.com/vishvananda/netns v0.0.5 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect - golang.org/x/net v0.52.0 // indirect golang.org/x/text v0.36.0 // indirect golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.43.0 // indirect diff --git a/pkg/services/forwarder/icmp.go b/pkg/services/forwarder/icmp.go new file mode 100644 index 00000000..fbf52f10 --- /dev/null +++ b/pkg/services/forwarder/icmp.go @@ -0,0 +1,73 @@ +package forwarder + +import ( + "sync" + + log "github.com/sirupsen/logrus" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +type Forwarder struct { + handler func(request *ICMPForwarderRequest) + stack *stack.Stack +} + +func ICMP(s *stack.Stack, nat map[tcpip.Address]tcpip.Address, natLock *sync.Mutex) *Forwarder { + return NewForwarder(s, func(r *ICMPForwarderRequest) { + localAddress := r.ID().LocalAddress + + // Skip forwarding for addresses that should be handled locally + if header.IsV4LoopbackAddress(localAddress) || localAddress == header.IPv4Broadcast { + return + } + + // Apply NAT translation if needed + natLock.Lock() + if replaced, ok := nat[localAddress]; ok { + localAddress = replaced + } + natLock.Unlock() + + pkt := r.Packet() + if pkt == nil { + log.Warningf("Dropping ICMP packet from VM (no packet data)") + return + } + + // Check if this is an ICMP Echo Request (PING) + transportHeader := pkt.TransportHeader().Slice() + if len(transportHeader) < header.ICMPv4MinimumSize { + log.Warningf("Dropping ICMP packet from VM (packet too short)") + return + } + + icmpHeader := header.ICMPv4(transportHeader) + if icmpHeader.Type() != header.ICMPv4Echo { + // Not a PING, drop it + log.Warningf("Dropping ICMP packet from VM (type %d, not Echo Request)", icmpHeader.Type()) + return + } + + // This is a PING request - forward it using unprivileged ICMP sockets + go handlePingRequest(s, r, localAddress, icmpHeader, pkt) + }) +} + +// HandlePacket handles all packets. +// +// This function is expected to be passed as an argument to the +// stack.SetTransportProtocolHandler function. +func (f *Forwarder) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool { + f.handler(NewICMPForwarderRequest(f.stack, id, pkt.IncRef())) + return true +} + +// NewForwarder allocates and initializes a new forwarder. +func NewForwarder(s *stack.Stack, handler func(*ICMPForwarderRequest)) *Forwarder { + return &Forwarder{ + stack: s, + handler: handler, + } +} diff --git a/pkg/services/forwarder/icmp_forwarder.go b/pkg/services/forwarder/icmp_forwarder.go new file mode 100644 index 00000000..bc14c932 --- /dev/null +++ b/pkg/services/forwarder/icmp_forwarder.go @@ -0,0 +1,40 @@ +package forwarder + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" +) + +// ICMPForwarderRequest represents a request to forward an ICMP packet. +type ICMPForwarderRequest struct { + stack *stack.Stack + id stack.TransportEndpointID + pkt *stack.PacketBuffer +} + +// NewICMPForwarderRequest creates a new ICMP forwarder request. +func NewICMPForwarderRequest(s *stack.Stack, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *ICMPForwarderRequest { + return &ICMPForwarderRequest{ + stack: s, + id: id, + pkt: pkt, + } +} + +// ID returns the 4-tuple (src address, src port, dst address, dst port) that +// represents the connection request. +func (f *ICMPForwarderRequest) ID() stack.TransportEndpointID { + return f.id +} + +// Packet returns the packet buffer associated with this forwarder request. +func (f *ICMPForwarderRequest) Packet() *stack.PacketBuffer { + return f.pkt +} + +// CreateEndpoint creates a new endpoint for this forwarder request. +func (f *ICMPForwarderRequest) CreateEndpoint(s *stack.Stack, wq *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { + // Use the stack's public NewEndpoint API instead of linkname + return s.NewEndpoint(f.pkt.TransportProtocolNumber, f.pkt.NetworkProtocolNumber, wq) +} diff --git a/pkg/services/forwarder/icmp_packet.go b/pkg/services/forwarder/icmp_packet.go new file mode 100644 index 00000000..ef2620c1 --- /dev/null +++ b/pkg/services/forwarder/icmp_packet.go @@ -0,0 +1,253 @@ +package forwarder + +import ( + "fmt" + "net" + + log "github.com/sirupsen/logrus" + netIcmp "golang.org/x/net/icmp" + "golang.org/x/net/ipv4" + "gvisor.dev/gvisor/pkg/buffer" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/checksum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +// echoRequestDetails contains the extracted details from an ICMP echo request. +type echoRequestDetails struct { + ident uint16 + seq uint16 + payload []byte + srcAddr tcpip.Address + dataBuf buffer.Buffer +} + +// safeUint16 safely converts an int to uint16, clamping to valid range. +// ICMP ID and sequence numbers are 16-bit values, so values outside this range +// are invalid and will be clamped. +func safeUint16(v int) uint16 { + if v < 0 { + return 0 + } + if v > 0xFFFF { + return 0xFFFF + } + return uint16(v) +} + +// handlePingRequest handles forwarding an ICMP echo request (PING) from the VM +// to the external network and injecting the reply back into the VM. +func handlePingRequest(s *stack.Stack, r *ICMPForwarderRequest, destAddr tcpip.Address, icmpHeader header.ICMPv4, pkt *stack.PacketBuffer) { + defer pkt.DecRef() + + // Extract ICMP echo request details + details, err := extractEchoRequestDetails(r, icmpHeader, pkt) + if err != nil { + return + } + defer details.dataBuf.Release() + + // Create ICMP connection + conn, err := createICMPConnection() + if err != nil { + return + } + defer conn.Close() + + // Send the echo request + if err := sendEchoRequest(conn, destAddr, details.ident, details.seq, details.payload); err != nil { + return + } + + // Receive and parse the echo reply + echoReply, err := receiveEchoReply(conn) + if err != nil { + return + } + + // Validate the reply matches our request + if !validateEchoReply(echoReply, details.ident, details.seq) { + return + } + + // Forward the reply back to the VM's network stack + // Safely convert int to uint16 (ICMP ID and Seq are 16-bit values) + forwardEchoReply(s, r, details.srcAddr, destAddr, safeUint16(echoReply.ID), safeUint16(echoReply.Seq), echoReply.Data) +} + +// extractEchoRequestDetails extracts the identifier, sequence, payload, and source address +// from an ICMP echo request packet. +func extractEchoRequestDetails(r *ICMPForwarderRequest, icmpHeader header.ICMPv4, pkt *stack.PacketBuffer) (*echoRequestDetails, error) { + ident := icmpHeader.Ident() + seq := icmpHeader.Sequence() + + // Extract payload data + dataBuf := pkt.Data().ToBuffer() + dataSize := int(dataBuf.Size()) + payload := make([]byte, dataSize) + if dataSize > 0 { + _, _ = dataBuf.ReadAt(payload, 0) + } + + // Get source address from the request + srcAddr := r.ID().RemoteAddress + + return &echoRequestDetails{ + ident: ident, + seq: seq, + payload: payload, + srcAddr: srcAddr, + dataBuf: dataBuf, + }, nil +} + +// sendEchoRequest creates and sends an ICMP echo request message. +func sendEchoRequest(conn *netIcmp.PacketConn, destAddr tcpip.Address, ident, seq uint16, payload []byte) error { + // Create ICMP echo request message + msg := &netIcmp.Message{ + Type: ipv4.ICMPTypeEcho, + Code: 0, + Body: &netIcmp.Echo{ + ID: int(ident), + Seq: int(seq), + Data: payload, + }, + } + + // Marshal the message + msgBytes, err := msg.Marshal(nil) + if err != nil { + log.Debugf("Failed to marshal ICMP message: %v", err) + return err + } + + // Parse destination address + dstIP := net.ParseIP(destAddr.String()) + if dstIP == nil { + log.Debugf("Failed to parse destination address: %s", destAddr) + return fmt.Errorf("failed to parse destination address: %s", destAddr) + } + + // Create destination address based on platform + dst := createDestinationAddr(dstIP) + + // Send the ping request + _, err = conn.WriteTo(msgBytes, dst) + if err != nil { + log.Debugf("Failed to send ICMP echo request: %v", err) + return err + } + + return nil +} + +// receiveEchoReply reads and parses an ICMP echo reply from the connection. +func receiveEchoReply(conn *netIcmp.PacketConn) (*netIcmp.Echo, error) { + // Read the reply + replyBytes := make([]byte, 1500) + n, _, err := conn.ReadFrom(replyBytes) + if err != nil { + log.Debugf("Failed to receive ICMP echo reply: %v", err) + return nil, err + } + + // Extract ICMP data (skip IP header on Windows) + replyData, err := extractICMPData(replyBytes[:n]) + if err != nil { + return nil, err + } + + // Parse the reply + replyMsg, err := netIcmp.ParseMessage(ipv4.ICMPTypeEchoReply.Protocol(), replyData) + if err != nil { + log.Debugf("Failed to parse ICMP reply: %v", err) + return nil, err + } + + // Check if it's an echo reply + if replyMsg.Type != ipv4.ICMPTypeEchoReply { + log.Debugf("Received ICMP message type %v, expected Echo Reply", replyMsg.Type) + return nil, fmt.Errorf("unexpected ICMP message type: %v", replyMsg.Type) + } + + echoReply, ok := replyMsg.Body.(*netIcmp.Echo) + if !ok { + log.Debugf("ICMP reply body is not an Echo") + return nil, fmt.Errorf("ICMP reply body is not an Echo") + } + + return echoReply, nil +} + +// validateEchoReply verifies that the echo reply matches the original request. +func validateEchoReply(echoReply *netIcmp.Echo, expectedIdent, expectedSeq uint16) bool { + if echoReply.ID != int(expectedIdent) || echoReply.Seq != int(expectedSeq) { + log.Debugf("ICMP reply ID/Seq mismatch: got ID=%d Seq=%d, expected ID=%d Seq=%d", + echoReply.ID, echoReply.Seq, expectedIdent, expectedSeq) + return false + } + return true +} + +// forwardEchoReply creates an ICMP echo reply packet and forwards it back to the VM. +func forwardEchoReply(s *stack.Stack, r *ICMPForwarderRequest, dstAddr tcpip.Address, srcAddr tcpip.Address, ident, seq uint16, data []byte) { + // Create ICMP echo reply header + icmpHeaderSize := header.ICMPv4MinimumSize + icmpBuf := make([]byte, icmpHeaderSize+len(data)) + icmpHdr := header.ICMPv4(icmpBuf) + + icmpHdr.SetType(header.ICMPv4EchoReply) + icmpHdr.SetCode(0) + icmpHdr.SetIdent(ident) + icmpHdr.SetSequence(seq) + + // Copy data + if len(data) > 0 { + copy(icmpBuf[icmpHeaderSize:], data) + } + + // Calculate checksum + icmpHdr.SetChecksum(0) + icmpHdr.SetChecksum(^checksum.Checksum(icmpBuf, 0)) + + // Get the original packet's network info + origPkt := r.Packet() + if origPkt == nil { + return + } + + // Find route to send the reply back + // Use srcAddr (the address we pinged) as the local address so the reply appears + // to come from the address the VM originally pinged, not from the gateway + route, err := s.FindRoute(origPkt.NICID, srcAddr, dstAddr, header.IPv4ProtocolNumber, false) + if err != nil { + log.Debugf("Failed to find route for ICMP reply: %v", err) + return + } + defer route.Release() + + // Create packet buffer with ICMP reply + payload := buffer.MakeWithData(icmpBuf) + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(route.MaxHeaderLength()), + Payload: payload, + }) + defer pkt.DecRef() + + pkt.NetworkProtocolNumber = header.IPv4ProtocolNumber + pkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber + + // Write the packet + params := stack.NetworkHeaderParams{ + Protocol: header.ICMPv4ProtocolNumber, + TTL: 64, + TOS: 0, + } + if err := route.WritePacket(params, pkt); err != nil { + log.Debugf("Failed to forward ICMP echo reply: %v", err) + return + } + + log.Debugf("Successfully forwarded ICMP echo reply to %s", dstAddr) +} diff --git a/pkg/services/forwarder/icmp_packet_unix.go b/pkg/services/forwarder/icmp_packet_unix.go new file mode 100644 index 00000000..d42a5091 --- /dev/null +++ b/pkg/services/forwarder/icmp_packet_unix.go @@ -0,0 +1,42 @@ +//go:build !windows + +package forwarder + +import ( + "net" + "time" + + log "github.com/sirupsen/logrus" + netIcmp "golang.org/x/net/icmp" +) + +// createICMPConnection creates an ICMP connection using unprivileged ICMP sockets (udp4) on Linux/macOS. +func createICMPConnection() (*netIcmp.PacketConn, error) { + conn, err := netIcmp.ListenPacket("udp4", "0.0.0.0") + if err != nil { + log.Debugf("Failed to create ICMP connection: %v", err) + return nil, err + } + + // Set read deadline + if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil { + conn.Close() + log.Debugf("Failed to set read deadline: %v", err) + return nil, err + } + + return conn, nil +} + +// createDestinationAddr creates a destination address for Unix unprivileged sockets. +func createDestinationAddr(dstIP net.IP) net.Addr { + // Linux/macOS use net.UDPAddr for unprivileged sockets + return &net.UDPAddr{IP: dstIP, Port: 0} +} + +// extractICMPData extracts ICMP data from the received bytes. +// On Linux/macOS unprivileged sockets, it returns the data as-is. +func extractICMPData(replyBytes []byte) ([]byte, error) { + // Linux/macOS unprivileged sockets return just the ICMP data + return replyBytes, nil +} diff --git a/pkg/services/forwarder/icmp_packet_windows.go b/pkg/services/forwarder/icmp_packet_windows.go new file mode 100644 index 00000000..749441e8 --- /dev/null +++ b/pkg/services/forwarder/icmp_packet_windows.go @@ -0,0 +1,62 @@ +//go:build windows + +package forwarder + +import ( + "fmt" + "net" + "time" + + log "github.com/sirupsen/logrus" + netIcmp "golang.org/x/net/icmp" +) + +// createICMPConnection creates an ICMP connection using privileged raw sockets (ip4:icmp) on Windows. +func createICMPConnection() (*netIcmp.PacketConn, error) { + conn, err := netIcmp.ListenPacket("ip4:icmp", "0.0.0.0") + if err != nil { + log.Debugf("Failed to create ICMP connection: %v", err) + return nil, err + } + + // Set read deadline + if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil { + conn.Close() + log.Debugf("Failed to set read deadline: %v", err) + return nil, err + } + + return conn, nil +} + +// createDestinationAddr creates a destination address for Windows raw sockets. +func createDestinationAddr(dstIP net.IP) net.Addr { + // Windows requires net.IPAddr for raw sockets + return &net.IPAddr{IP: dstIP} +} + +// extractICMPData extracts ICMP data from the received bytes. +// On Windows with raw sockets, it skips the IP header. +func extractICMPData(replyBytes []byte) ([]byte, error) { + // Raw sockets on Windows include the IP header, so we need to skip it + if len(replyBytes) < 20 { + log.Debugf("Reply packet too short: %d bytes", len(replyBytes)) + return nil, fmt.Errorf("reply packet too short: %d bytes", len(replyBytes)) + } + + // Check if it's IPv4 (first byte: version and IHL) + version := (replyBytes[0] >> 4) & 0x0F + if version != 4 { + log.Debugf("Unexpected IP version: %d", version) + return nil, fmt.Errorf("unexpected IP version: %d", version) + } + + // Get IP header length (IHL is in the lower 4 bits of first byte, in 4-byte units) + ihl := int(replyBytes[0]&0x0F) * 4 + if ihl < 20 || ihl > len(replyBytes) { + log.Debugf("Invalid IP header length: %d", ihl) + return nil, fmt.Errorf("invalid IP header length: %d", ihl) + } + + return replyBytes[ihl:], nil +} diff --git a/pkg/virtualnetwork/services.go b/pkg/virtualnetwork/services.go index 6c4155e0..f6b0b5e2 100644 --- a/pkg/virtualnetwork/services.go +++ b/pkg/virtualnetwork/services.go @@ -16,6 +16,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" ) @@ -28,6 +29,8 @@ func addServices(configuration *types.Configuration, s *stack.Stack, ipPool *tap s.SetTransportProtocolHandler(tcp.ProtocolNumber, tcpForwarder.HandlePacket) udpForwarder := forwarder.UDP(s, translation, &natLock) s.SetTransportProtocolHandler(udp.ProtocolNumber, udpForwarder.HandlePacket) + icmpForwarder := forwarder.ICMP(s, translation, &natLock) + s.SetTransportProtocolHandler(icmp.ProtocolNumber4, icmpForwarder.HandlePacket) dnsMux, err := dnsServer(configuration, s) if err != nil { diff --git a/test-vfkit/basic_test.go b/test-vfkit/basic_test.go index 8bbe9349..7fe72a1d 100644 --- a/test-vfkit/basic_test.go +++ b/test-vfkit/basic_test.go @@ -124,4 +124,9 @@ var _ = ginkgo.Describe("ping with gvproxy and vfkit", func() { log.Infof("ping: %s", out) gomega.Expect(err).To(gomega.HaveOccurred()) }) + ginkgo.It("should succeed to ping an localhost", func() { + out, err := sshExec("ping -w2 127.0.0.1") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) diff --git a/vendor/golang.org/x/net/icmp/dstunreach.go b/vendor/golang.org/x/net/icmp/dstunreach.go new file mode 100644 index 00000000..8615cf54 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/dstunreach.go @@ -0,0 +1,59 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "golang.org/x/net/internal/iana" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +// A DstUnreach represents an ICMP destination unreachable message +// body. +type DstUnreach struct { + Data []byte // data, known as original datagram field + Extensions []Extension // extensions +} + +// Len implements the Len method of MessageBody interface. +func (p *DstUnreach) Len(proto int) int { + if p == nil { + return 0 + } + l, _ := multipartMessageBodyDataLen(proto, true, p.Data, p.Extensions) + return l +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *DstUnreach) Marshal(proto int) ([]byte, error) { + var typ Type + switch proto { + case iana.ProtocolICMP: + typ = ipv4.ICMPTypeDestinationUnreachable + case iana.ProtocolIPv6ICMP: + typ = ipv6.ICMPTypeDestinationUnreachable + default: + return nil, errInvalidProtocol + } + if !validExtensions(typ, p.Extensions) { + return nil, errInvalidExtension + } + return marshalMultipartMessageBody(proto, true, p.Data, p.Extensions) +} + +// parseDstUnreach parses b as an ICMP destination unreachable message +// body. +func parseDstUnreach(proto int, typ Type, b []byte) (MessageBody, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + p := &DstUnreach{} + var err error + p.Data, p.Extensions, err = parseMultipartMessageBody(proto, typ, b) + if err != nil { + return nil, err + } + return p, nil +} diff --git a/vendor/golang.org/x/net/icmp/echo.go b/vendor/golang.org/x/net/icmp/echo.go new file mode 100644 index 00000000..b5918642 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/echo.go @@ -0,0 +1,173 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "encoding/binary" + + "golang.org/x/net/internal/iana" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +// An Echo represents an ICMP echo request or reply message body. +type Echo struct { + ID int // identifier + Seq int // sequence number + Data []byte // data +} + +// Len implements the Len method of MessageBody interface. +func (p *Echo) Len(proto int) int { + if p == nil { + return 0 + } + return 4 + len(p.Data) +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *Echo) Marshal(proto int) ([]byte, error) { + b := make([]byte, 4+len(p.Data)) + binary.BigEndian.PutUint16(b[:2], uint16(p.ID)) + binary.BigEndian.PutUint16(b[2:4], uint16(p.Seq)) + copy(b[4:], p.Data) + return b, nil +} + +// parseEcho parses b as an ICMP echo request or reply message body. +func parseEcho(proto int, _ Type, b []byte) (MessageBody, error) { + bodyLen := len(b) + if bodyLen < 4 { + return nil, errMessageTooShort + } + p := &Echo{ID: int(binary.BigEndian.Uint16(b[:2])), Seq: int(binary.BigEndian.Uint16(b[2:4]))} + if bodyLen > 4 { + p.Data = make([]byte, bodyLen-4) + copy(p.Data, b[4:]) + } + return p, nil +} + +// An ExtendedEchoRequest represents an ICMP extended echo request +// message body. +type ExtendedEchoRequest struct { + ID int // identifier + Seq int // sequence number + Local bool // must be true when identifying by name or index + Extensions []Extension // extensions +} + +// Len implements the Len method of MessageBody interface. +func (p *ExtendedEchoRequest) Len(proto int) int { + if p == nil { + return 0 + } + l, _ := multipartMessageBodyDataLen(proto, false, nil, p.Extensions) + return l +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *ExtendedEchoRequest) Marshal(proto int) ([]byte, error) { + var typ Type + switch proto { + case iana.ProtocolICMP: + typ = ipv4.ICMPTypeExtendedEchoRequest + case iana.ProtocolIPv6ICMP: + typ = ipv6.ICMPTypeExtendedEchoRequest + default: + return nil, errInvalidProtocol + } + if !validExtensions(typ, p.Extensions) { + return nil, errInvalidExtension + } + b, err := marshalMultipartMessageBody(proto, false, nil, p.Extensions) + if err != nil { + return nil, err + } + binary.BigEndian.PutUint16(b[:2], uint16(p.ID)) + b[2] = byte(p.Seq) + if p.Local { + b[3] |= 0x01 + } + return b, nil +} + +// parseExtendedEchoRequest parses b as an ICMP extended echo request +// message body. +func parseExtendedEchoRequest(proto int, typ Type, b []byte) (MessageBody, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + p := &ExtendedEchoRequest{ID: int(binary.BigEndian.Uint16(b[:2])), Seq: int(b[2])} + if b[3]&0x01 != 0 { + p.Local = true + } + var err error + _, p.Extensions, err = parseMultipartMessageBody(proto, typ, b) + if err != nil { + return nil, err + } + return p, nil +} + +// An ExtendedEchoReply represents an ICMP extended echo reply message +// body. +type ExtendedEchoReply struct { + ID int // identifier + Seq int // sequence number + State int // 3-bit state working together with Message.Code + Active bool // probed interface is active + IPv4 bool // probed interface runs IPv4 + IPv6 bool // probed interface runs IPv6 +} + +// Len implements the Len method of MessageBody interface. +func (p *ExtendedEchoReply) Len(proto int) int { + if p == nil { + return 0 + } + return 4 +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *ExtendedEchoReply) Marshal(proto int) ([]byte, error) { + b := make([]byte, 4) + binary.BigEndian.PutUint16(b[:2], uint16(p.ID)) + b[2] = byte(p.Seq) + b[3] = byte(p.State<<5) & 0xe0 + if p.Active { + b[3] |= 0x04 + } + if p.IPv4 { + b[3] |= 0x02 + } + if p.IPv6 { + b[3] |= 0x01 + } + return b, nil +} + +// parseExtendedEchoReply parses b as an ICMP extended echo reply +// message body. +func parseExtendedEchoReply(proto int, _ Type, b []byte) (MessageBody, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + p := &ExtendedEchoReply{ + ID: int(binary.BigEndian.Uint16(b[:2])), + Seq: int(b[2]), + State: int(b[3]) >> 5, + } + if b[3]&0x04 != 0 { + p.Active = true + } + if b[3]&0x02 != 0 { + p.IPv4 = true + } + if b[3]&0x01 != 0 { + p.IPv6 = true + } + return p, nil +} diff --git a/vendor/golang.org/x/net/icmp/endpoint.go b/vendor/golang.org/x/net/icmp/endpoint.go new file mode 100644 index 00000000..47f5b698 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/endpoint.go @@ -0,0 +1,113 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "net" + "runtime" + "time" + + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +var _ net.PacketConn = &PacketConn{} + +// A PacketConn represents a packet network endpoint that uses either +// ICMPv4 or ICMPv6. +type PacketConn struct { + c net.PacketConn + p4 *ipv4.PacketConn + p6 *ipv6.PacketConn +} + +func (c *PacketConn) ok() bool { return c != nil && c.c != nil } + +// IPv4PacketConn returns the ipv4.PacketConn of c. +// It returns nil when c is not created as the endpoint for ICMPv4. +func (c *PacketConn) IPv4PacketConn() *ipv4.PacketConn { + if !c.ok() { + return nil + } + return c.p4 +} + +// IPv6PacketConn returns the ipv6.PacketConn of c. +// It returns nil when c is not created as the endpoint for ICMPv6. +func (c *PacketConn) IPv6PacketConn() *ipv6.PacketConn { + if !c.ok() { + return nil + } + return c.p6 +} + +// ReadFrom reads an ICMP message from the connection. +func (c *PacketConn) ReadFrom(b []byte) (int, net.Addr, error) { + if !c.ok() { + return 0, nil, errInvalidConn + } + // Please be informed that ipv4.NewPacketConn enables + // IP_STRIPHDR option by default on Darwin. + // See golang.org/issue/9395 for further information. + if (runtime.GOOS == "darwin" || runtime.GOOS == "ios") && c.p4 != nil { + n, _, peer, err := c.p4.ReadFrom(b) + return n, peer, err + } + return c.c.ReadFrom(b) +} + +// WriteTo writes the ICMP message b to dst. +// The provided dst must be net.UDPAddr when c is a non-privileged +// datagram-oriented ICMP endpoint. +// Otherwise it must be net.IPAddr. +func (c *PacketConn) WriteTo(b []byte, dst net.Addr) (int, error) { + if !c.ok() { + return 0, errInvalidConn + } + return c.c.WriteTo(b, dst) +} + +// Close closes the endpoint. +func (c *PacketConn) Close() error { + if !c.ok() { + return errInvalidConn + } + return c.c.Close() +} + +// LocalAddr returns the local network address. +func (c *PacketConn) LocalAddr() net.Addr { + if !c.ok() { + return nil + } + return c.c.LocalAddr() +} + +// SetDeadline sets the read and write deadlines associated with the +// endpoint. +func (c *PacketConn) SetDeadline(t time.Time) error { + if !c.ok() { + return errInvalidConn + } + return c.c.SetDeadline(t) +} + +// SetReadDeadline sets the read deadline associated with the +// endpoint. +func (c *PacketConn) SetReadDeadline(t time.Time) error { + if !c.ok() { + return errInvalidConn + } + return c.c.SetReadDeadline(t) +} + +// SetWriteDeadline sets the write deadline associated with the +// endpoint. +func (c *PacketConn) SetWriteDeadline(t time.Time) error { + if !c.ok() { + return errInvalidConn + } + return c.c.SetWriteDeadline(t) +} diff --git a/vendor/golang.org/x/net/icmp/extension.go b/vendor/golang.org/x/net/icmp/extension.go new file mode 100644 index 00000000..eeb85c3f --- /dev/null +++ b/vendor/golang.org/x/net/icmp/extension.go @@ -0,0 +1,170 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "encoding/binary" + + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +// An Extension represents an ICMP extension. +type Extension interface { + // Len returns the length of ICMP extension. + // The provided proto must be either the ICMPv4 or ICMPv6 + // protocol number. + Len(proto int) int + + // Marshal returns the binary encoding of ICMP extension. + // The provided proto must be either the ICMPv4 or ICMPv6 + // protocol number. + Marshal(proto int) ([]byte, error) +} + +const extensionVersion = 2 + +func validExtensionHeader(b []byte) bool { + v := int(b[0]&0xf0) >> 4 + s := binary.BigEndian.Uint16(b[2:4]) + if s != 0 { + s = checksum(b) + } + if v != extensionVersion || s != 0 { + return false + } + return true +} + +// parseExtensions parses b as a list of ICMP extensions. +// The length attribute l must be the length attribute field in +// received icmp messages. +// +// It will return a list of ICMP extensions and an adjusted length +// attribute that represents the length of the padded original +// datagram field. Otherwise, it returns an error. +func parseExtensions(typ Type, b []byte, l int) ([]Extension, int, error) { + // Still a lot of non-RFC 4884 compliant implementations are + // out there. Set the length attribute l to 128 when it looks + // inappropriate for backwards compatibility. + // + // A minimal extension at least requires 8 octets; 4 octets + // for an extension header, and 4 octets for a single object + // header. + // + // See RFC 4884 for further information. + switch typ { + case ipv4.ICMPTypeExtendedEchoRequest, ipv6.ICMPTypeExtendedEchoRequest: + if len(b) < 8 || !validExtensionHeader(b) { + return nil, -1, errNoExtension + } + l = 0 + default: + if 128 > l || l+8 > len(b) { + l = 128 + } + if l+8 > len(b) { + return nil, -1, errNoExtension + } + if !validExtensionHeader(b[l:]) { + if l == 128 { + return nil, -1, errNoExtension + } + l = 128 + if !validExtensionHeader(b[l:]) { + return nil, -1, errNoExtension + } + } + } + var exts []Extension + for b = b[l+4:]; len(b) >= 4; { + ol := int(binary.BigEndian.Uint16(b[:2])) + if 4 > ol || ol > len(b) { + break + } + switch b[2] { + case classMPLSLabelStack: + ext, err := parseMPLSLabelStack(b[:ol]) + if err != nil { + return nil, -1, err + } + exts = append(exts, ext) + case classInterfaceInfo: + ext, err := parseInterfaceInfo(b[:ol]) + if err != nil { + return nil, -1, err + } + exts = append(exts, ext) + case classInterfaceIdent: + ext, err := parseInterfaceIdent(b[:ol]) + if err != nil { + return nil, -1, err + } + exts = append(exts, ext) + default: + ext := &RawExtension{Data: make([]byte, ol)} + copy(ext.Data, b[:ol]) + exts = append(exts, ext) + } + b = b[ol:] + } + return exts, l, nil +} + +func validExtensions(typ Type, exts []Extension) bool { + switch typ { + case ipv4.ICMPTypeDestinationUnreachable, ipv4.ICMPTypeTimeExceeded, ipv4.ICMPTypeParameterProblem, + ipv6.ICMPTypeDestinationUnreachable, ipv6.ICMPTypeTimeExceeded: + for i := range exts { + switch exts[i].(type) { + case *MPLSLabelStack, *InterfaceInfo, *RawExtension: + default: + return false + } + } + return true + case ipv4.ICMPTypeExtendedEchoRequest, ipv6.ICMPTypeExtendedEchoRequest: + var n int + for i := range exts { + switch exts[i].(type) { + case *InterfaceIdent: + n++ + case *RawExtension: + default: + return false + } + } + // Not a single InterfaceIdent object or a combo of + // RawExtension and InterfaceIdent objects is not + // allowed. + if n == 1 && len(exts) > 1 { + return false + } + return true + default: + return false + } +} + +// A RawExtension represents a raw extension. +// +// A raw extension is excluded from message processing and can be used +// to construct applications such as protocol conformance testing. +type RawExtension struct { + Data []byte // data +} + +// Len implements the Len method of Extension interface. +func (p *RawExtension) Len(proto int) int { + if p == nil { + return 0 + } + return len(p.Data) +} + +// Marshal implements the Marshal method of Extension interface. +func (p *RawExtension) Marshal(proto int) ([]byte, error) { + return p.Data, nil +} diff --git a/vendor/golang.org/x/net/icmp/helper_posix.go b/vendor/golang.org/x/net/icmp/helper_posix.go new file mode 100644 index 00000000..f625483f --- /dev/null +++ b/vendor/golang.org/x/net/icmp/helper_posix.go @@ -0,0 +1,75 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows + +package icmp + +import ( + "net" + "strconv" + "syscall" +) + +func sockaddr(family int, address string) (syscall.Sockaddr, error) { + switch family { + case syscall.AF_INET: + a, err := net.ResolveIPAddr("ip4", address) + if err != nil { + return nil, err + } + if len(a.IP) == 0 { + a.IP = net.IPv4zero + } + if a.IP = a.IP.To4(); a.IP == nil { + return nil, net.InvalidAddrError("non-ipv4 address") + } + sa := &syscall.SockaddrInet4{} + copy(sa.Addr[:], a.IP) + return sa, nil + case syscall.AF_INET6: + a, err := net.ResolveIPAddr("ip6", address) + if err != nil { + return nil, err + } + if len(a.IP) == 0 { + a.IP = net.IPv6unspecified + } + if a.IP.Equal(net.IPv4zero) { + a.IP = net.IPv6unspecified + } + if a.IP = a.IP.To16(); a.IP == nil || a.IP.To4() != nil { + return nil, net.InvalidAddrError("non-ipv6 address") + } + sa := &syscall.SockaddrInet6{ZoneId: zoneToUint32(a.Zone)} + copy(sa.Addr[:], a.IP) + return sa, nil + default: + return nil, net.InvalidAddrError("unexpected family") + } +} + +func zoneToUint32(zone string) uint32 { + if zone == "" { + return 0 + } + if ifi, err := net.InterfaceByName(zone); err == nil { + return uint32(ifi.Index) + } + n, err := strconv.Atoi(zone) + if err != nil { + return 0 + } + return uint32(n) +} + +func last(s string, b byte) int { + i := len(s) + for i--; i >= 0; i-- { + if s[i] == b { + break + } + } + return i +} diff --git a/vendor/golang.org/x/net/icmp/interface.go b/vendor/golang.org/x/net/icmp/interface.go new file mode 100644 index 00000000..b3dd72fb --- /dev/null +++ b/vendor/golang.org/x/net/icmp/interface.go @@ -0,0 +1,322 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "encoding/binary" + "net" + "strings" + + "golang.org/x/net/internal/iana" +) + +const ( + classInterfaceInfo = 2 +) + +const ( + attrMTU = 1 << iota + attrName + attrIPAddr + attrIfIndex +) + +// An InterfaceInfo represents interface and next-hop identification. +type InterfaceInfo struct { + Class int // extension object class number + Type int // extension object sub-type + Interface *net.Interface + Addr *net.IPAddr +} + +func (ifi *InterfaceInfo) nameLen() int { + if len(ifi.Interface.Name) > 63 { + return 64 + } + l := 1 + len(ifi.Interface.Name) + return (l + 3) &^ 3 +} + +func (ifi *InterfaceInfo) attrsAndLen(proto int) (attrs, l int) { + l = 4 + if ifi.Interface != nil && ifi.Interface.Index > 0 { + attrs |= attrIfIndex + l += 4 + if len(ifi.Interface.Name) > 0 { + attrs |= attrName + l += ifi.nameLen() + } + if ifi.Interface.MTU > 0 { + attrs |= attrMTU + l += 4 + } + } + if ifi.Addr != nil { + switch proto { + case iana.ProtocolICMP: + if ifi.Addr.IP.To4() != nil { + attrs |= attrIPAddr + l += 4 + net.IPv4len + } + case iana.ProtocolIPv6ICMP: + if ifi.Addr.IP.To16() != nil && ifi.Addr.IP.To4() == nil { + attrs |= attrIPAddr + l += 4 + net.IPv6len + } + } + } + return +} + +// Len implements the Len method of Extension interface. +func (ifi *InterfaceInfo) Len(proto int) int { + _, l := ifi.attrsAndLen(proto) + return l +} + +// Marshal implements the Marshal method of Extension interface. +func (ifi *InterfaceInfo) Marshal(proto int) ([]byte, error) { + attrs, l := ifi.attrsAndLen(proto) + b := make([]byte, l) + if err := ifi.marshal(proto, b, attrs, l); err != nil { + return nil, err + } + return b, nil +} + +func (ifi *InterfaceInfo) marshal(proto int, b []byte, attrs, l int) error { + binary.BigEndian.PutUint16(b[:2], uint16(l)) + b[2], b[3] = classInterfaceInfo, byte(ifi.Type) + for b = b[4:]; len(b) > 0 && attrs != 0; { + switch { + case attrs&attrIfIndex != 0: + b = ifi.marshalIfIndex(proto, b) + attrs &^= attrIfIndex + case attrs&attrIPAddr != 0: + b = ifi.marshalIPAddr(proto, b) + attrs &^= attrIPAddr + case attrs&attrName != 0: + b = ifi.marshalName(proto, b) + attrs &^= attrName + case attrs&attrMTU != 0: + b = ifi.marshalMTU(proto, b) + attrs &^= attrMTU + } + } + return nil +} + +func (ifi *InterfaceInfo) marshalIfIndex(proto int, b []byte) []byte { + binary.BigEndian.PutUint32(b[:4], uint32(ifi.Interface.Index)) + return b[4:] +} + +func (ifi *InterfaceInfo) parseIfIndex(b []byte) ([]byte, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + ifi.Interface.Index = int(binary.BigEndian.Uint32(b[:4])) + return b[4:], nil +} + +func (ifi *InterfaceInfo) marshalIPAddr(proto int, b []byte) []byte { + switch proto { + case iana.ProtocolICMP: + binary.BigEndian.PutUint16(b[:2], uint16(iana.AddrFamilyIPv4)) + copy(b[4:4+net.IPv4len], ifi.Addr.IP.To4()) + b = b[4+net.IPv4len:] + case iana.ProtocolIPv6ICMP: + binary.BigEndian.PutUint16(b[:2], uint16(iana.AddrFamilyIPv6)) + copy(b[4:4+net.IPv6len], ifi.Addr.IP.To16()) + b = b[4+net.IPv6len:] + } + return b +} + +func (ifi *InterfaceInfo) parseIPAddr(b []byte) ([]byte, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + afi := int(binary.BigEndian.Uint16(b[:2])) + b = b[4:] + switch afi { + case iana.AddrFamilyIPv4: + if len(b) < net.IPv4len { + return nil, errMessageTooShort + } + ifi.Addr.IP = make(net.IP, net.IPv4len) + copy(ifi.Addr.IP, b[:net.IPv4len]) + b = b[net.IPv4len:] + case iana.AddrFamilyIPv6: + if len(b) < net.IPv6len { + return nil, errMessageTooShort + } + ifi.Addr.IP = make(net.IP, net.IPv6len) + copy(ifi.Addr.IP, b[:net.IPv6len]) + b = b[net.IPv6len:] + } + return b, nil +} + +func (ifi *InterfaceInfo) marshalName(proto int, b []byte) []byte { + l := byte(ifi.nameLen()) + b[0] = l + copy(b[1:], []byte(ifi.Interface.Name)) + return b[l:] +} + +func (ifi *InterfaceInfo) parseName(b []byte) ([]byte, error) { + if 4 > len(b) || len(b) < int(b[0]) { + return nil, errMessageTooShort + } + l := int(b[0]) + if l%4 != 0 || 4 > l || l > 64 { + return nil, errInvalidExtension + } + var name [63]byte + copy(name[:], b[1:l]) + ifi.Interface.Name = strings.Trim(string(name[:]), "\000") + return b[l:], nil +} + +func (ifi *InterfaceInfo) marshalMTU(proto int, b []byte) []byte { + binary.BigEndian.PutUint32(b[:4], uint32(ifi.Interface.MTU)) + return b[4:] +} + +func (ifi *InterfaceInfo) parseMTU(b []byte) ([]byte, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + ifi.Interface.MTU = int(binary.BigEndian.Uint32(b[:4])) + return b[4:], nil +} + +func parseInterfaceInfo(b []byte) (Extension, error) { + ifi := &InterfaceInfo{ + Class: int(b[2]), + Type: int(b[3]), + } + if ifi.Type&(attrIfIndex|attrName|attrMTU) != 0 { + ifi.Interface = &net.Interface{} + } + if ifi.Type&attrIPAddr != 0 { + ifi.Addr = &net.IPAddr{} + } + attrs := ifi.Type & (attrIfIndex | attrIPAddr | attrName | attrMTU) + for b = b[4:]; len(b) > 0 && attrs != 0; { + var err error + switch { + case attrs&attrIfIndex != 0: + b, err = ifi.parseIfIndex(b) + attrs &^= attrIfIndex + case attrs&attrIPAddr != 0: + b, err = ifi.parseIPAddr(b) + attrs &^= attrIPAddr + case attrs&attrName != 0: + b, err = ifi.parseName(b) + attrs &^= attrName + case attrs&attrMTU != 0: + b, err = ifi.parseMTU(b) + attrs &^= attrMTU + } + if err != nil { + return nil, err + } + } + if ifi.Interface != nil && ifi.Interface.Name != "" && ifi.Addr != nil && ifi.Addr.IP.To16() != nil && ifi.Addr.IP.To4() == nil { + ifi.Addr.Zone = ifi.Interface.Name + } + return ifi, nil +} + +const ( + classInterfaceIdent = 3 + typeInterfaceByName = 1 + typeInterfaceByIndex = 2 + typeInterfaceByAddress = 3 +) + +// An InterfaceIdent represents interface identification. +type InterfaceIdent struct { + Class int // extension object class number + Type int // extension object sub-type + Name string // interface name + Index int // interface index + AFI int // address family identifier; see address family numbers in IANA registry + Addr []byte // address +} + +// Len implements the Len method of Extension interface. +func (ifi *InterfaceIdent) Len(_ int) int { + switch ifi.Type { + case typeInterfaceByName: + l := len(ifi.Name) + if l > 255 { + l = 255 + } + return 4 + (l+3)&^3 + case typeInterfaceByIndex: + return 4 + 4 + case typeInterfaceByAddress: + return 4 + 4 + (len(ifi.Addr)+3)&^3 + default: + return 4 + } +} + +// Marshal implements the Marshal method of Extension interface. +func (ifi *InterfaceIdent) Marshal(proto int) ([]byte, error) { + b := make([]byte, ifi.Len(proto)) + if err := ifi.marshal(proto, b); err != nil { + return nil, err + } + return b, nil +} + +func (ifi *InterfaceIdent) marshal(proto int, b []byte) error { + l := ifi.Len(proto) + binary.BigEndian.PutUint16(b[:2], uint16(l)) + b[2], b[3] = classInterfaceIdent, byte(ifi.Type) + switch ifi.Type { + case typeInterfaceByName: + copy(b[4:], ifi.Name) + case typeInterfaceByIndex: + binary.BigEndian.PutUint32(b[4:4+4], uint32(ifi.Index)) + case typeInterfaceByAddress: + binary.BigEndian.PutUint16(b[4:4+2], uint16(ifi.AFI)) + b[4+2] = byte(len(ifi.Addr)) + copy(b[4+4:], ifi.Addr) + } + return nil +} + +func parseInterfaceIdent(b []byte) (Extension, error) { + ifi := &InterfaceIdent{ + Class: int(b[2]), + Type: int(b[3]), + } + switch ifi.Type { + case typeInterfaceByName: + ifi.Name = strings.Trim(string(b[4:]), "\x00") + case typeInterfaceByIndex: + if len(b[4:]) < 4 { + return nil, errInvalidExtension + } + ifi.Index = int(binary.BigEndian.Uint32(b[4 : 4+4])) + case typeInterfaceByAddress: + if len(b[4:]) < 4 { + return nil, errInvalidExtension + } + ifi.AFI = int(binary.BigEndian.Uint16(b[4 : 4+2])) + l := int(b[4+2]) + if len(b[4+4:]) < l { + return nil, errInvalidExtension + } + ifi.Addr = make([]byte, l) + copy(ifi.Addr, b[4+4:]) + } + return ifi, nil +} diff --git a/vendor/golang.org/x/net/icmp/ipv4.go b/vendor/golang.org/x/net/icmp/ipv4.go new file mode 100644 index 00000000..1ce31f14 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/ipv4.go @@ -0,0 +1,68 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "encoding/binary" + "net" + "runtime" + + "golang.org/x/net/ipv4" +) + +// freebsdVersion is set in sys_freebsd.go. +// See http://www.freebsd.org/doc/en/books/porters-handbook/freebsd-versions.html. +var freebsdVersion uint32 + +// ParseIPv4Header returns the IPv4 header of the IPv4 packet that +// triggered an ICMP error message. +// This is found in the Data field of the ICMP error message body. +// +// The provided b must be in the format used by a raw ICMP socket on +// the local system. +// This may differ from the wire format, and the format used by a raw +// IP socket, depending on the system. +// +// To parse an IPv6 header, use ipv6.ParseHeader. +func ParseIPv4Header(b []byte) (*ipv4.Header, error) { + if len(b) < ipv4.HeaderLen { + return nil, errHeaderTooShort + } + hdrlen := int(b[0]&0x0f) << 2 + if hdrlen > len(b) { + return nil, errBufferTooShort + } + h := &ipv4.Header{ + Version: int(b[0] >> 4), + Len: hdrlen, + TOS: int(b[1]), + ID: int(binary.BigEndian.Uint16(b[4:6])), + FragOff: int(binary.BigEndian.Uint16(b[6:8])), + TTL: int(b[8]), + Protocol: int(b[9]), + Checksum: int(binary.BigEndian.Uint16(b[10:12])), + Src: net.IPv4(b[12], b[13], b[14], b[15]), + Dst: net.IPv4(b[16], b[17], b[18], b[19]), + } + switch runtime.GOOS { + case "darwin", "ios": + h.TotalLen = int(binary.NativeEndian.Uint16(b[2:4])) + case "freebsd": + if freebsdVersion >= 1000000 { + h.TotalLen = int(binary.BigEndian.Uint16(b[2:4])) + } else { + h.TotalLen = int(binary.NativeEndian.Uint16(b[2:4])) + } + default: + h.TotalLen = int(binary.BigEndian.Uint16(b[2:4])) + } + h.Flags = ipv4.HeaderFlags(h.FragOff&0xe000) >> 13 + h.FragOff = h.FragOff & 0x1fff + if hdrlen-ipv4.HeaderLen > 0 { + h.Options = make([]byte, hdrlen-ipv4.HeaderLen) + copy(h.Options, b[ipv4.HeaderLen:]) + } + return h, nil +} diff --git a/vendor/golang.org/x/net/icmp/ipv6.go b/vendor/golang.org/x/net/icmp/ipv6.go new file mode 100644 index 00000000..2e8cfeb1 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/ipv6.go @@ -0,0 +1,23 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "net" + + "golang.org/x/net/internal/iana" +) + +const ipv6PseudoHeaderLen = 2*net.IPv6len + 8 + +// IPv6PseudoHeader returns an IPv6 pseudo header for checksum +// calculation. +func IPv6PseudoHeader(src, dst net.IP) []byte { + b := make([]byte, ipv6PseudoHeaderLen) + copy(b, src.To16()) + copy(b[net.IPv6len:], dst.To16()) + b[len(b)-1] = byte(iana.ProtocolIPv6ICMP) + return b +} diff --git a/vendor/golang.org/x/net/icmp/listen_posix.go b/vendor/golang.org/x/net/icmp/listen_posix.go new file mode 100644 index 00000000..b7cb15b7 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/listen_posix.go @@ -0,0 +1,105 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows + +package icmp + +import ( + "net" + "os" + "runtime" + "syscall" + + "golang.org/x/net/internal/iana" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +const sysIP_STRIPHDR = 0x17 // for now only darwin supports this option + +// ListenPacket listens for incoming ICMP packets addressed to +// address. See net.Dial for the syntax of address. +// +// For non-privileged datagram-oriented ICMP endpoints, network must +// be "udp4" or "udp6". The endpoint allows to read, write a few +// limited ICMP messages such as echo request and echo reply. +// Currently only Darwin and Linux support this. +// +// Examples: +// +// ListenPacket("udp4", "192.168.0.1") +// ListenPacket("udp4", "0.0.0.0") +// ListenPacket("udp6", "fe80::1%en0") +// ListenPacket("udp6", "::") +// +// For privileged raw ICMP endpoints, network must be "ip4" or "ip6" +// followed by a colon and an ICMP protocol number or name. +// +// Examples: +// +// ListenPacket("ip4:icmp", "192.168.0.1") +// ListenPacket("ip4:1", "0.0.0.0") +// ListenPacket("ip6:ipv6-icmp", "fe80::1%en0") +// ListenPacket("ip6:58", "::") +func ListenPacket(network, address string) (*PacketConn, error) { + var family, proto int + switch network { + case "udp4": + family, proto = syscall.AF_INET, iana.ProtocolICMP + case "udp6": + family, proto = syscall.AF_INET6, iana.ProtocolIPv6ICMP + default: + i := last(network, ':') + if i < 0 { + i = len(network) + } + switch network[:i] { + case "ip4": + proto = iana.ProtocolICMP + case "ip6": + proto = iana.ProtocolIPv6ICMP + } + } + var cerr error + var c net.PacketConn + switch family { + case syscall.AF_INET, syscall.AF_INET6: + s, err := syscall.Socket(family, syscall.SOCK_DGRAM, proto) + if err != nil { + return nil, os.NewSyscallError("socket", err) + } + if (runtime.GOOS == "darwin" || runtime.GOOS == "ios") && family == syscall.AF_INET { + if err := syscall.SetsockoptInt(s, iana.ProtocolIP, sysIP_STRIPHDR, 1); err != nil { + syscall.Close(s) + return nil, os.NewSyscallError("setsockopt", err) + } + } + sa, err := sockaddr(family, address) + if err != nil { + syscall.Close(s) + return nil, err + } + if err := syscall.Bind(s, sa); err != nil { + syscall.Close(s) + return nil, os.NewSyscallError("bind", err) + } + f := os.NewFile(uintptr(s), "datagram-oriented icmp") + c, cerr = net.FilePacketConn(f) + f.Close() + default: + c, cerr = net.ListenPacket(network, address) + } + if cerr != nil { + return nil, cerr + } + switch proto { + case iana.ProtocolICMP: + return &PacketConn{c: c, p4: ipv4.NewPacketConn(c)}, nil + case iana.ProtocolIPv6ICMP: + return &PacketConn{c: c, p6: ipv6.NewPacketConn(c)}, nil + default: + return &PacketConn{c: c}, nil + } +} diff --git a/vendor/golang.org/x/net/icmp/listen_stub.go b/vendor/golang.org/x/net/icmp/listen_stub.go new file mode 100644 index 00000000..7b76be1c --- /dev/null +++ b/vendor/golang.org/x/net/icmp/listen_stub.go @@ -0,0 +1,35 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows + +package icmp + +// ListenPacket listens for incoming ICMP packets addressed to +// address. See net.Dial for the syntax of address. +// +// For non-privileged datagram-oriented ICMP endpoints, network must +// be "udp4" or "udp6". The endpoint allows to read, write a few +// limited ICMP messages such as echo request and echo reply. +// Currently only Darwin and Linux support this. +// +// Examples: +// +// ListenPacket("udp4", "192.168.0.1") +// ListenPacket("udp4", "0.0.0.0") +// ListenPacket("udp6", "fe80::1%en0") +// ListenPacket("udp6", "::") +// +// For privileged raw ICMP endpoints, network must be "ip4" or "ip6" +// followed by a colon and an ICMP protocol number or name. +// +// Examples: +// +// ListenPacket("ip4:icmp", "192.168.0.1") +// ListenPacket("ip4:1", "0.0.0.0") +// ListenPacket("ip6:ipv6-icmp", "fe80::1%en0") +// ListenPacket("ip6:58", "::") +func ListenPacket(network, address string) (*PacketConn, error) { + return nil, errNotImplemented +} diff --git a/vendor/golang.org/x/net/icmp/message.go b/vendor/golang.org/x/net/icmp/message.go new file mode 100644 index 00000000..40db65d0 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/message.go @@ -0,0 +1,162 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package icmp provides basic functions for the manipulation of +// messages used in the Internet Control Message Protocols, +// ICMPv4 and ICMPv6. +// +// ICMPv4 and ICMPv6 are defined in RFC 792 and RFC 4443. +// Multi-part message support for ICMP is defined in RFC 4884. +// ICMP extensions for MPLS are defined in RFC 4950. +// ICMP extensions for interface and next-hop identification are +// defined in RFC 5837. +// PROBE: A utility for probing interfaces is defined in RFC 8335. +package icmp // import "golang.org/x/net/icmp" + +import ( + "encoding/binary" + "errors" + "net" + "runtime" + + "golang.org/x/net/internal/iana" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +// BUG(mikio): This package is not implemented on JS, NaCl and Plan 9. + +var ( + errInvalidConn = errors.New("invalid connection") + errInvalidProtocol = errors.New("invalid protocol") + errMessageTooShort = errors.New("message too short") + errHeaderTooShort = errors.New("header too short") + errBufferTooShort = errors.New("buffer too short") + errInvalidBody = errors.New("invalid body") + errNoExtension = errors.New("no extension") + errInvalidExtension = errors.New("invalid extension") + errNotImplemented = errors.New("not implemented on " + runtime.GOOS + "/" + runtime.GOARCH) +) + +func checksum(b []byte) uint16 { + csumcv := len(b) - 1 // checksum coverage + s := uint32(0) + for i := 0; i < csumcv; i += 2 { + s += uint32(b[i+1])<<8 | uint32(b[i]) + } + if csumcv&1 == 0 { + s += uint32(b[csumcv]) + } + s = s>>16 + s&0xffff + s = s + s>>16 + return ^uint16(s) +} + +// A Type represents an ICMP message type. +type Type interface { + Protocol() int +} + +// A Message represents an ICMP message. +type Message struct { + Type Type // type, either ipv4.ICMPType or ipv6.ICMPType + Code int // code + Checksum int // checksum + Body MessageBody // body +} + +// Marshal returns the binary encoding of the ICMP message m. +// +// For an ICMPv4 message, the returned message always contains the +// calculated checksum field. +// +// For an ICMPv6 message, the returned message contains the calculated +// checksum field when psh is not nil, otherwise the kernel will +// compute the checksum field during the message transmission. +// When psh is not nil, it must be the pseudo header for IPv6. +func (m *Message) Marshal(psh []byte) ([]byte, error) { + var mtype byte + switch typ := m.Type.(type) { + case ipv4.ICMPType: + mtype = byte(typ) + case ipv6.ICMPType: + mtype = byte(typ) + default: + return nil, errInvalidProtocol + } + b := []byte{mtype, byte(m.Code), 0, 0} + proto := m.Type.Protocol() + if proto == iana.ProtocolIPv6ICMP && psh != nil { + b = append(psh, b...) + } + if m.Body != nil && m.Body.Len(proto) != 0 { + mb, err := m.Body.Marshal(proto) + if err != nil { + return nil, err + } + b = append(b, mb...) + } + if proto == iana.ProtocolIPv6ICMP { + if psh == nil { // cannot calculate checksum here + return b, nil + } + off, l := 2*net.IPv6len, len(b)-len(psh) + binary.BigEndian.PutUint32(b[off:off+4], uint32(l)) + } + s := checksum(b) + // Place checksum back in header; using ^= avoids the + // assumption the checksum bytes are zero. + b[len(psh)+2] ^= byte(s) + b[len(psh)+3] ^= byte(s >> 8) + return b[len(psh):], nil +} + +var parseFns = map[Type]func(int, Type, []byte) (MessageBody, error){ + ipv4.ICMPTypeDestinationUnreachable: parseDstUnreach, + ipv4.ICMPTypeTimeExceeded: parseTimeExceeded, + ipv4.ICMPTypeParameterProblem: parseParamProb, + + ipv4.ICMPTypeEcho: parseEcho, + ipv4.ICMPTypeEchoReply: parseEcho, + ipv4.ICMPTypeExtendedEchoRequest: parseExtendedEchoRequest, + ipv4.ICMPTypeExtendedEchoReply: parseExtendedEchoReply, + + ipv6.ICMPTypeDestinationUnreachable: parseDstUnreach, + ipv6.ICMPTypePacketTooBig: parsePacketTooBig, + ipv6.ICMPTypeTimeExceeded: parseTimeExceeded, + ipv6.ICMPTypeParameterProblem: parseParamProb, + + ipv6.ICMPTypeEchoRequest: parseEcho, + ipv6.ICMPTypeEchoReply: parseEcho, + ipv6.ICMPTypeExtendedEchoRequest: parseExtendedEchoRequest, + ipv6.ICMPTypeExtendedEchoReply: parseExtendedEchoReply, +} + +// ParseMessage parses b as an ICMP message. +// The provided proto must be either the ICMPv4 or ICMPv6 protocol +// number. +func ParseMessage(proto int, b []byte) (*Message, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + var err error + m := &Message{Code: int(b[1]), Checksum: int(binary.BigEndian.Uint16(b[2:4]))} + switch proto { + case iana.ProtocolICMP: + m.Type = ipv4.ICMPType(b[0]) + case iana.ProtocolIPv6ICMP: + m.Type = ipv6.ICMPType(b[0]) + default: + return nil, errInvalidProtocol + } + if fn, ok := parseFns[m.Type]; !ok { + m.Body, err = parseRawBody(proto, b[4:]) + } else { + m.Body, err = fn(proto, m.Type, b[4:]) + } + if err != nil { + return nil, err + } + return m, nil +} diff --git a/vendor/golang.org/x/net/icmp/messagebody.go b/vendor/golang.org/x/net/icmp/messagebody.go new file mode 100644 index 00000000..e2d9bfa0 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/messagebody.go @@ -0,0 +1,52 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +// A MessageBody represents an ICMP message body. +type MessageBody interface { + // Len returns the length of ICMP message body. + // The provided proto must be either the ICMPv4 or ICMPv6 + // protocol number. + Len(proto int) int + + // Marshal returns the binary encoding of ICMP message body. + // The provided proto must be either the ICMPv4 or ICMPv6 + // protocol number. + Marshal(proto int) ([]byte, error) +} + +// A RawBody represents a raw message body. +// +// A raw message body is excluded from message processing and can be +// used to construct applications such as protocol conformance +// testing. +type RawBody struct { + Data []byte // data +} + +// Len implements the Len method of MessageBody interface. +func (p *RawBody) Len(proto int) int { + if p == nil { + return 0 + } + return len(p.Data) +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *RawBody) Marshal(proto int) ([]byte, error) { + return p.Data, nil +} + +// parseRawBody parses b as an ICMP message body. +func parseRawBody(proto int, b []byte) (MessageBody, error) { + p := &RawBody{Data: make([]byte, len(b))} + copy(p.Data, b) + return p, nil +} + +// A DefaultMessageBody represents the default message body. +// +// Deprecated: Use RawBody instead. +type DefaultMessageBody = RawBody diff --git a/vendor/golang.org/x/net/icmp/mpls.go b/vendor/golang.org/x/net/icmp/mpls.go new file mode 100644 index 00000000..f9f4841b --- /dev/null +++ b/vendor/golang.org/x/net/icmp/mpls.go @@ -0,0 +1,77 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import "encoding/binary" + +// MPLSLabel represents an MPLS label stack entry. +type MPLSLabel struct { + Label int // label value + TC int // traffic class; formerly experimental use + S bool // bottom of stack + TTL int // time to live +} + +const ( + classMPLSLabelStack = 1 + typeIncomingMPLSLabelStack = 1 +) + +// MPLSLabelStack represents an MPLS label stack. +type MPLSLabelStack struct { + Class int // extension object class number + Type int // extension object sub-type + Labels []MPLSLabel +} + +// Len implements the Len method of Extension interface. +func (ls *MPLSLabelStack) Len(proto int) int { + return 4 + (4 * len(ls.Labels)) +} + +// Marshal implements the Marshal method of Extension interface. +func (ls *MPLSLabelStack) Marshal(proto int) ([]byte, error) { + b := make([]byte, ls.Len(proto)) + if err := ls.marshal(proto, b); err != nil { + return nil, err + } + return b, nil +} + +func (ls *MPLSLabelStack) marshal(proto int, b []byte) error { + l := ls.Len(proto) + binary.BigEndian.PutUint16(b[:2], uint16(l)) + b[2], b[3] = classMPLSLabelStack, typeIncomingMPLSLabelStack + off := 4 + for _, ll := range ls.Labels { + b[off], b[off+1], b[off+2] = byte(ll.Label>>12), byte(ll.Label>>4&0xff), byte(ll.Label<<4&0xf0) + b[off+2] |= byte(ll.TC << 1 & 0x0e) + if ll.S { + b[off+2] |= 0x1 + } + b[off+3] = byte(ll.TTL) + off += 4 + } + return nil +} + +func parseMPLSLabelStack(b []byte) (Extension, error) { + ls := &MPLSLabelStack{ + Class: int(b[2]), + Type: int(b[3]), + } + for b = b[4:]; len(b) >= 4; b = b[4:] { + ll := MPLSLabel{ + Label: int(b[0])<<12 | int(b[1])<<4 | int(b[2])>>4, + TC: int(b[2]&0x0e) >> 1, + TTL: int(b[3]), + } + if b[2]&0x1 != 0 { + ll.S = true + } + ls.Labels = append(ls.Labels, ll) + } + return ls, nil +} diff --git a/vendor/golang.org/x/net/icmp/multipart.go b/vendor/golang.org/x/net/icmp/multipart.go new file mode 100644 index 00000000..c7b72bf3 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/multipart.go @@ -0,0 +1,129 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import "golang.org/x/net/internal/iana" + +// multipartMessageBodyDataLen takes b as an original datagram and +// exts as extensions, and returns a required length for message body +// and a required length for a padded original datagram in wire +// format. +func multipartMessageBodyDataLen(proto int, withOrigDgram bool, b []byte, exts []Extension) (bodyLen, dataLen int) { + bodyLen = 4 // length of leading octets + var extLen int + var rawExt bool // raw extension may contain an empty object + for _, ext := range exts { + extLen += ext.Len(proto) + if _, ok := ext.(*RawExtension); ok { + rawExt = true + } + } + if extLen > 0 && withOrigDgram { + dataLen = multipartMessageOrigDatagramLen(proto, b) + } else { + dataLen = len(b) + } + if extLen > 0 || rawExt { + bodyLen += 4 // length of extension header + } + bodyLen += dataLen + extLen + return bodyLen, dataLen +} + +// multipartMessageOrigDatagramLen takes b as an original datagram, +// and returns a required length for a padded original datagram in wire +// format. +func multipartMessageOrigDatagramLen(proto int, b []byte) int { + roundup := func(b []byte, align int) int { + // According to RFC 4884, the padded original datagram + // field must contain at least 128 octets. + if len(b) < 128 { + return 128 + } + r := len(b) + return (r + align - 1) &^ (align - 1) + } + switch proto { + case iana.ProtocolICMP: + return roundup(b, 4) + case iana.ProtocolIPv6ICMP: + return roundup(b, 8) + default: + return len(b) + } +} + +// marshalMultipartMessageBody takes data as an original datagram and +// exts as extesnsions, and returns a binary encoding of message body. +// It can be used for non-multipart message bodies when exts is nil. +func marshalMultipartMessageBody(proto int, withOrigDgram bool, data []byte, exts []Extension) ([]byte, error) { + bodyLen, dataLen := multipartMessageBodyDataLen(proto, withOrigDgram, data, exts) + b := make([]byte, bodyLen) + copy(b[4:], data) + if len(exts) > 0 { + b[4+dataLen] = byte(extensionVersion << 4) + off := 4 + dataLen + 4 // leading octets, data, extension header + for _, ext := range exts { + switch ext := ext.(type) { + case *MPLSLabelStack: + if err := ext.marshal(proto, b[off:]); err != nil { + return nil, err + } + off += ext.Len(proto) + case *InterfaceInfo: + attrs, l := ext.attrsAndLen(proto) + if err := ext.marshal(proto, b[off:], attrs, l); err != nil { + return nil, err + } + off += ext.Len(proto) + case *InterfaceIdent: + if err := ext.marshal(proto, b[off:]); err != nil { + return nil, err + } + off += ext.Len(proto) + case *RawExtension: + copy(b[off:], ext.Data) + off += ext.Len(proto) + } + } + s := checksum(b[4+dataLen:]) + b[4+dataLen+2] ^= byte(s) + b[4+dataLen+3] ^= byte(s >> 8) + if withOrigDgram { + switch proto { + case iana.ProtocolICMP: + b[1] = byte(dataLen / 4) + case iana.ProtocolIPv6ICMP: + b[0] = byte(dataLen / 8) + } + } + } + return b, nil +} + +// parseMultipartMessageBody parses b as either a non-multipart +// message body or a multipart message body. +func parseMultipartMessageBody(proto int, typ Type, b []byte) ([]byte, []Extension, error) { + var l int + switch proto { + case iana.ProtocolICMP: + l = 4 * int(b[1]) + case iana.ProtocolIPv6ICMP: + l = 8 * int(b[0]) + } + if len(b) == 4 { + return nil, nil, nil + } + exts, l, err := parseExtensions(typ, b[4:], l) + if err != nil { + l = len(b) - 4 + } + var data []byte + if l > 0 { + data = make([]byte, l) + copy(data, b[4:]) + } + return data, exts, nil +} diff --git a/vendor/golang.org/x/net/icmp/packettoobig.go b/vendor/golang.org/x/net/icmp/packettoobig.go new file mode 100644 index 00000000..afbf24f1 --- /dev/null +++ b/vendor/golang.org/x/net/icmp/packettoobig.go @@ -0,0 +1,43 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import "encoding/binary" + +// A PacketTooBig represents an ICMP packet too big message body. +type PacketTooBig struct { + MTU int // maximum transmission unit of the nexthop link + Data []byte // data, known as original datagram field +} + +// Len implements the Len method of MessageBody interface. +func (p *PacketTooBig) Len(proto int) int { + if p == nil { + return 0 + } + return 4 + len(p.Data) +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *PacketTooBig) Marshal(proto int) ([]byte, error) { + b := make([]byte, 4+len(p.Data)) + binary.BigEndian.PutUint32(b[:4], uint32(p.MTU)) + copy(b[4:], p.Data) + return b, nil +} + +// parsePacketTooBig parses b as an ICMP packet too big message body. +func parsePacketTooBig(proto int, _ Type, b []byte) (MessageBody, error) { + bodyLen := len(b) + if bodyLen < 4 { + return nil, errMessageTooShort + } + p := &PacketTooBig{MTU: int(binary.BigEndian.Uint32(b[:4]))} + if bodyLen > 4 { + p.Data = make([]byte, bodyLen-4) + copy(p.Data, b[4:]) + } + return p, nil +} diff --git a/vendor/golang.org/x/net/icmp/paramprob.go b/vendor/golang.org/x/net/icmp/paramprob.go new file mode 100644 index 00000000..f16fd33e --- /dev/null +++ b/vendor/golang.org/x/net/icmp/paramprob.go @@ -0,0 +1,72 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "encoding/binary" + + "golang.org/x/net/internal/iana" + "golang.org/x/net/ipv4" +) + +// A ParamProb represents an ICMP parameter problem message body. +type ParamProb struct { + Pointer uintptr // offset within the data where the error was detected + Data []byte // data, known as original datagram field + Extensions []Extension // extensions +} + +// Len implements the Len method of MessageBody interface. +func (p *ParamProb) Len(proto int) int { + if p == nil { + return 0 + } + l, _ := multipartMessageBodyDataLen(proto, true, p.Data, p.Extensions) + return l +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *ParamProb) Marshal(proto int) ([]byte, error) { + switch proto { + case iana.ProtocolICMP: + if !validExtensions(ipv4.ICMPTypeParameterProblem, p.Extensions) { + return nil, errInvalidExtension + } + b, err := marshalMultipartMessageBody(proto, true, p.Data, p.Extensions) + if err != nil { + return nil, err + } + b[0] = byte(p.Pointer) + return b, nil + case iana.ProtocolIPv6ICMP: + b := make([]byte, p.Len(proto)) + binary.BigEndian.PutUint32(b[:4], uint32(p.Pointer)) + copy(b[4:], p.Data) + return b, nil + default: + return nil, errInvalidProtocol + } +} + +// parseParamProb parses b as an ICMP parameter problem message body. +func parseParamProb(proto int, typ Type, b []byte) (MessageBody, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + p := &ParamProb{} + if proto == iana.ProtocolIPv6ICMP { + p.Pointer = uintptr(binary.BigEndian.Uint32(b[:4])) + p.Data = make([]byte, len(b)-4) + copy(p.Data, b[4:]) + return p, nil + } + p.Pointer = uintptr(b[0]) + var err error + p.Data, p.Extensions, err = parseMultipartMessageBody(proto, typ, b) + if err != nil { + return nil, err + } + return p, nil +} diff --git a/vendor/golang.org/x/net/icmp/sys_freebsd.go b/vendor/golang.org/x/net/icmp/sys_freebsd.go new file mode 100644 index 00000000..c75f3dda --- /dev/null +++ b/vendor/golang.org/x/net/icmp/sys_freebsd.go @@ -0,0 +1,11 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import "syscall" + +func init() { + freebsdVersion, _ = syscall.SysctlUint32("kern.osreldate") +} diff --git a/vendor/golang.org/x/net/icmp/timeexceeded.go b/vendor/golang.org/x/net/icmp/timeexceeded.go new file mode 100644 index 00000000..ffa986fd --- /dev/null +++ b/vendor/golang.org/x/net/icmp/timeexceeded.go @@ -0,0 +1,57 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package icmp + +import ( + "golang.org/x/net/internal/iana" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +// A TimeExceeded represents an ICMP time exceeded message body. +type TimeExceeded struct { + Data []byte // data, known as original datagram field + Extensions []Extension // extensions +} + +// Len implements the Len method of MessageBody interface. +func (p *TimeExceeded) Len(proto int) int { + if p == nil { + return 0 + } + l, _ := multipartMessageBodyDataLen(proto, true, p.Data, p.Extensions) + return l +} + +// Marshal implements the Marshal method of MessageBody interface. +func (p *TimeExceeded) Marshal(proto int) ([]byte, error) { + var typ Type + switch proto { + case iana.ProtocolICMP: + typ = ipv4.ICMPTypeTimeExceeded + case iana.ProtocolIPv6ICMP: + typ = ipv6.ICMPTypeTimeExceeded + default: + return nil, errInvalidProtocol + } + if !validExtensions(typ, p.Extensions) { + return nil, errInvalidExtension + } + return marshalMultipartMessageBody(proto, true, p.Data, p.Extensions) +} + +// parseTimeExceeded parses b as an ICMP time exceeded message body. +func parseTimeExceeded(proto int, typ Type, b []byte) (MessageBody, error) { + if len(b) < 4 { + return nil, errMessageTooShort + } + p := &TimeExceeded{} + var err error + p.Data, p.Extensions, err = parseMultipartMessageBody(proto, typ, b) + if err != nil { + return nil, err + } + return p, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 7bc981f0..96f72aac 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -175,6 +175,7 @@ golang.org/x/net/bpf golang.org/x/net/html golang.org/x/net/html/atom golang.org/x/net/html/charset +golang.org/x/net/icmp golang.org/x/net/internal/iana golang.org/x/net/internal/socket golang.org/x/net/ipv4 From 35b65b6b66deec82338d7efb52d9fd2ceaffa307 Mon Sep 17 00:00:00 2001 From: Yevhen Vydolob Date: Thu, 19 Feb 2026 15:18:21 +0200 Subject: [PATCH 5/7] feat: implement getExpectedReplyIdent for ICMP echo identifier handling - Added getExpectedReplyIdent function to handle ICMP echo identifier validation for Linux and Windows. - Updated handlePingRequest to use the new function for validating echo replies. - Add ping tests for qemu test suite Signed-off-by: Yevhen Vydolob --- pkg/services/forwarder/icmp_packet.go | 25 +++++----------- pkg/services/forwarder/icmp_packet_unix.go | 21 ++++++++++++++ pkg/services/forwarder/icmp_packet_windows.go | 6 ++++ test-qemu/basic_test.go | 29 +++++++++++++++++++ 4 files changed, 63 insertions(+), 18 deletions(-) diff --git a/pkg/services/forwarder/icmp_packet.go b/pkg/services/forwarder/icmp_packet.go index ef2620c1..aed3c155 100644 --- a/pkg/services/forwarder/icmp_packet.go +++ b/pkg/services/forwarder/icmp_packet.go @@ -23,19 +23,6 @@ type echoRequestDetails struct { dataBuf buffer.Buffer } -// safeUint16 safely converts an int to uint16, clamping to valid range. -// ICMP ID and sequence numbers are 16-bit values, so values outside this range -// are invalid and will be clamped. -func safeUint16(v int) uint16 { - if v < 0 { - return 0 - } - if v > 0xFFFF { - return 0xFFFF - } - return uint16(v) -} - // handlePingRequest handles forwarding an ICMP echo request (PING) from the VM // to the external network and injecting the reply back into the VM. func handlePingRequest(s *stack.Stack, r *ICMPForwarderRequest, destAddr tcpip.Address, icmpHeader header.ICMPv4, pkt *stack.PacketBuffer) { @@ -66,14 +53,16 @@ func handlePingRequest(s *stack.Stack, r *ICMPForwarderRequest, destAddr tcpip.A return } - // Validate the reply matches our request - if !validateEchoReply(echoReply, details.ident, details.seq) { + // Validate the reply matches our request (on Linux, kernel uses socket port as echo ID) + expectedIdent := getExpectedReplyIdent(conn, details.ident) + if !validateEchoReply(echoReply, expectedIdent, details.seq) { return } - // Forward the reply back to the VM's network stack - // Safely convert int to uint16 (ICMP ID and Seq are 16-bit values) - forwardEchoReply(s, r, details.srcAddr, destAddr, safeUint16(echoReply.ID), safeUint16(echoReply.Seq), echoReply.Data) + // Forward the reply back to the VM's network stack. Use the VM's original + // ident/seq so the VM's ping process can match the reply to its request + // (on Linux the host reply has kernel-assigned ID; we must rewrite to VM's). + forwardEchoReply(s, r, details.srcAddr, destAddr, details.ident, details.seq, echoReply.Data) } // extractEchoRequestDetails extracts the identifier, sequence, payload, and source address diff --git a/pkg/services/forwarder/icmp_packet_unix.go b/pkg/services/forwarder/icmp_packet_unix.go index d42a5091..1ce28ec5 100644 --- a/pkg/services/forwarder/icmp_packet_unix.go +++ b/pkg/services/forwarder/icmp_packet_unix.go @@ -4,6 +4,7 @@ package forwarder import ( "net" + "runtime" "time" log "github.com/sirupsen/logrus" @@ -40,3 +41,23 @@ func extractICMPData(replyBytes []byte) ([]byte, error) { // Linux/macOS unprivileged sockets return just the ICMP data return replyBytes, nil } + +// getExpectedReplyIdent returns the ICMP echo identifier to expect in the reply. +// On Linux, the kernel overwrites the echo ID with the socket's local port for +// unprivileged ICMP sockets, so we must use that for validation. On macOS the +// kernel preserves the ID we send. +func getExpectedReplyIdent(conn *netIcmp.PacketConn, sentIdent uint16) uint16 { + if runtime.GOOS != "linux" { + return sentIdent + } + addr := conn.LocalAddr() + udpAddr, ok := addr.(*net.UDPAddr) + if !ok || udpAddr == nil { + return sentIdent + } + port := udpAddr.Port + if port < 0 || port > 0xFFFF { + return sentIdent + } + return uint16(port) +} diff --git a/pkg/services/forwarder/icmp_packet_windows.go b/pkg/services/forwarder/icmp_packet_windows.go index 749441e8..a79b6237 100644 --- a/pkg/services/forwarder/icmp_packet_windows.go +++ b/pkg/services/forwarder/icmp_packet_windows.go @@ -60,3 +60,9 @@ func extractICMPData(replyBytes []byte) ([]byte, error) { return replyBytes[ihl:], nil } + +// getExpectedReplyIdent returns the ICMP echo identifier to expect in the reply. +// On Windows (raw sockets) the kernel preserves the ID we send. +func getExpectedReplyIdent(conn *netIcmp.PacketConn, sentIdent uint16) uint16 { + return sentIdent +} diff --git a/test-qemu/basic_test.go b/test-qemu/basic_test.go index 1fdefc5b..d9a9af85 100644 --- a/test-qemu/basic_test.go +++ b/test-qemu/basic_test.go @@ -5,6 +5,7 @@ import ( e2e "github.com/containers/gvisor-tap-vsock/test" "github.com/onsi/ginkgo" "github.com/onsi/gomega" + log "github.com/sirupsen/logrus" ) var _ = ginkgo.Describe("connectivity with qemu", func() { @@ -52,3 +53,31 @@ var _ = ginkgo.Describe("command-line format", func() { })) }) }) + +var _ = ginkgo.Describe("ping with gvproxy", func() { + ginkgo.It("should succeed to ping a known domain", func() { + out, err := sshExec("ping -w2 crc.dev") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should fail to ping an unknown domain", func() { + out, err := sshExec("ping -w2 unknown.crc.dev") + log.Infof("ping: %s", out) + gomega.Expect(err).To(gomega.HaveOccurred()) + }) + ginkgo.It("should succeed to ping a known IP", func() { + out, err := sshExec("ping -w2 1.1.1.1") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should fail to ping an unknown IP", func() { + out, err := sshExec("ping -w2 7.7.7.7") + log.Infof("ping: %s", out) + gomega.Expect(err).To(gomega.HaveOccurred()) + }) + ginkgo.It("should succeed to ping an localhost", func() { + out, err := sshExec("ping -w2 127.0.0.1") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) +}) From 05d6cec0953e5b8ae3e4fc3479c312270659fd2c Mon Sep 17 00:00:00 2001 From: Yevhen Vydolob Date: Tue, 24 Feb 2026 09:56:09 +0200 Subject: [PATCH 6/7] Disable ping tests on CI As Azure and by extension Github Actions are not allowing to ping external domains, we need to disable the ping tests for now. Signed-off-by: Yevhen Vydolob --- test-qemu/basic_test.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test-qemu/basic_test.go b/test-qemu/basic_test.go index d9a9af85..baa90747 100644 --- a/test-qemu/basic_test.go +++ b/test-qemu/basic_test.go @@ -56,20 +56,23 @@ var _ = ginkgo.Describe("command-line format", func() { var _ = ginkgo.Describe("ping with gvproxy", func() { ginkgo.It("should succeed to ping a known domain", func() { + ginkgo.Skip("this test is not run in CI, because on Azure ICMP is blocked by default") out, err := sshExec("ping -w2 crc.dev") log.Infof("ping: %s", out) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It("should succeed to ping a known IP", func() { + ginkgo.Skip("this test is not run in CI, because on Azure ICMP is blocked by default") + out, err := sshExec("ping -w2 1.1.1.1") + log.Infof("ping: %s", out) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should fail to ping an unknown domain", func() { out, err := sshExec("ping -w2 unknown.crc.dev") log.Infof("ping: %s", out) gomega.Expect(err).To(gomega.HaveOccurred()) }) - ginkgo.It("should succeed to ping a known IP", func() { - out, err := sshExec("ping -w2 1.1.1.1") - log.Infof("ping: %s", out) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) ginkgo.It("should fail to ping an unknown IP", func() { out, err := sshExec("ping -w2 7.7.7.7") log.Infof("ping: %s", out) From c4ab5ba557ec4a481c1ecbf1ce6e54cbdbeaf6bc Mon Sep 17 00:00:00 2001 From: Yevhen Vydolob Date: Mon, 6 Apr 2026 10:10:08 +0300 Subject: [PATCH 7/7] fix handling icmp response on windows Signed-off-by: Yevhen Vydolob --- pkg/services/forwarder/icmp_packet_windows.go | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pkg/services/forwarder/icmp_packet_windows.go b/pkg/services/forwarder/icmp_packet_windows.go index a79b6237..0ca94027 100644 --- a/pkg/services/forwarder/icmp_packet_windows.go +++ b/pkg/services/forwarder/icmp_packet_windows.go @@ -36,33 +36,29 @@ func createDestinationAddr(dstIP net.IP) net.Addr { } // extractICMPData extracts ICMP data from the received bytes. -// On Windows with raw sockets, it skips the IP header. +// Some Windows versions return IPv4 with the IP header; others return ICMP only (e.g. echo +// reply type 0 makes the first byte 0x00, which is not IPv4 version 4). If the buffer +// starts with an IPv4 header, strip it; otherwise return the payload as-is. func extractICMPData(replyBytes []byte) ([]byte, error) { - // Raw sockets on Windows include the IP header, so we need to skip it - if len(replyBytes) < 20 { - log.Debugf("Reply packet too short: %d bytes", len(replyBytes)) - return nil, fmt.Errorf("reply packet too short: %d bytes", len(replyBytes)) + if len(replyBytes) == 0 { + return nil, fmt.Errorf("reply packet empty") } - - // Check if it's IPv4 (first byte: version and IHL) version := (replyBytes[0] >> 4) & 0x0F if version != 4 { - log.Debugf("Unexpected IP version: %d", version) - return nil, fmt.Errorf("unexpected IP version: %d", version) + return replyBytes, nil + } + if len(replyBytes) < 20 { + return nil, fmt.Errorf("reply packet too short for IPv4: %d bytes", len(replyBytes)) } - - // Get IP header length (IHL is in the lower 4 bits of first byte, in 4-byte units) ihl := int(replyBytes[0]&0x0F) * 4 if ihl < 20 || ihl > len(replyBytes) { - log.Debugf("Invalid IP header length: %d", ihl) return nil, fmt.Errorf("invalid IP header length: %d", ihl) } - return replyBytes[ihl:], nil } // getExpectedReplyIdent returns the ICMP echo identifier to expect in the reply. // On Windows (raw sockets) the kernel preserves the ID we send. -func getExpectedReplyIdent(conn *netIcmp.PacketConn, sentIdent uint16) uint16 { +func getExpectedReplyIdent(_ *netIcmp.PacketConn, sentIdent uint16) uint16 { return sentIdent }