Files
Yutaro Hayakawa 6e61cd407d Add a basic support for nexthop
Add a basic support of Linux's ip nexthop equivalent. In this PR, I
specifically focused on implementing a minimal feature to accomplish
IPv4 prefix with IPv6 (link-local) nexthop which is used by various
implementation like FRR to support technique called BGP Unnumbered.

The summary of the new features are:

- Introduce a low level primitive for nexthop in the nl package
- Introduce NexthopAdd/Del/List/Replace APIs (supports
  NHA_ID/BLACKHOLE/GATEWAY, and protocol field)
- Introduce NHID field to the Route object which allows attaching
  nexthop to routes.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

=== Squashed Commits ===

nl: Fix some wrong error and done message handling

The current logic of parsing ERROR and DONE message is, first reads
error field and when NLM_F_ACK_TLVS exists, tries to read the original
request header, payload of the request, and extended ACK.

We have three issues here:

1. The existence of the original request header is not indicated by
   NLM_F_ACK_TLVS flag. At least the original request header always
   exists.
2. We are missing the check for NLM_F_CAPPED flag. When the flag exists,
   the payload of the request doesn't exist. In that case, we shouldn't
   try to skip the payload. Otherwise, we may end up with the
   out-of-range read.
3. NLMSG_DONE doesn't contain the original request, so we shouldn't
   apply original request parsing logic to it.

In this commit, we fix these issues by:

1. We first check the existence of the NLM_F_CAPPED. When it exists,
   only skip the original request header. Otherwise, skip the payload as
   well. Don't apply this logic to the DONE message.
2. After that, check the existence of the NLM_F_ACK_TLVS. When it
   exists, try to read extended ACK for both of DONE and ERROR messages.
   Otherwise, don't.

Ref: https://docs.kernel.org/userspace-api/netlink/intro.html#netlink-message-types

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

nexthop: Add a low-level API for the nexthop

Preparation for the support of the nexthop object.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

Add basic operation for nexthop

Add a basic support of the Linux's nexthop object (ip nexthop XXX). This
commit aims to introduce a basic operations (add, list, del) with
minimal attributes. Further features can be added later incrementally.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

nexthop: Support NHA_OIF

It can be used for expressing direct nexthop on specific link.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

nexthop: Support NHA_GATEWAY

It can express an IP nexthop. A unique use case we can accomplish by
this is attaching IPv6 nexthop to the routes with an IPv4 prefix which
we cannot do with the existing `ip route` equivalents.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

nexthop: Support protocol

Allow setting protocol for nexthop.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

route: Support RTA_NHID

Support attaching nexthop object to route object via NHID field.

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>

nexthop: Add Replace operation support

Add `ip nexthop replace` equivalent

Signed-off-by: Yutaro Hayakawa <yutaro.hayakawa@isovalent.com>
2025-10-31 23:37:11 -07:00

258 lines
6.1 KiB
Go

package netlink
import (
"fmt"
"net"
"strings"
)
// Scope is an enum representing a route scope.
type Scope uint8
type NextHopFlag int
const (
RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
RT_FILTER_SCOPE
RT_FILTER_TYPE
RT_FILTER_TOS
RT_FILTER_IIF
RT_FILTER_OIF
RT_FILTER_DST
RT_FILTER_SRC
RT_FILTER_GW
RT_FILTER_TABLE
RT_FILTER_HOPLIMIT
RT_FILTER_PRIORITY
RT_FILTER_MARK
RT_FILTER_MASK
RT_FILTER_REALM
)
type Destination interface {
Family() int
Decode([]byte) error
Encode() ([]byte, error)
String() string
Equal(Destination) bool
}
type Encap interface {
Type() int
Decode([]byte) error
Encode() ([]byte, error)
String() string
Equal(Encap) bool
}
type RouteCacheInfo struct {
Users uint32
Age uint32
Expires int32
Error uint32
Used uint32
Id uint32
Ts uint32
Tsage uint32
}
// Protocol describe what was the originator of the route
type RouteProtocol int
// Route represents a netlink route.
type Route struct {
LinkIndex int
ILinkIndex int
Scope Scope
Dst *net.IPNet
Src net.IP
Gw net.IP
MultiPath []*NexthopInfo
Protocol RouteProtocol
Priority int
Family int
Table int
Type int
Tos int
Flags int
MPLSDst *int
NewDst Destination
Encap Encap
Via Destination
Realm int
MTU int
MTULock bool
Window int
Rtt int
RttVar int
Ssthresh int
Cwnd int
AdvMSS int
Reordering int
Hoplimit int
InitCwnd int
Features int
RtoMin int
RtoMinLock bool
InitRwnd int
QuickACK int
Congctl string
FastOpenNoCookie int
Expires int
CacheInfo *RouteCacheInfo
NHID uint32
}
func (r Route) String() string {
elems := []string{}
if len(r.MultiPath) == 0 {
elems = append(elems, fmt.Sprintf("Ifindex: %d", r.LinkIndex))
}
if r.MPLSDst != nil {
elems = append(elems, fmt.Sprintf("Dst: %d", r.MPLSDst))
} else {
elems = append(elems, fmt.Sprintf("Dst: %s", r.Dst))
}
if r.NewDst != nil {
elems = append(elems, fmt.Sprintf("NewDst: %s", r.NewDst))
}
if r.Encap != nil {
elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap))
}
if r.Via != nil {
elems = append(elems, fmt.Sprintf("Via: %s", r.Via))
}
elems = append(elems, fmt.Sprintf("Src: %s", r.Src))
if len(r.MultiPath) > 0 {
elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath))
} else {
elems = append(elems, fmt.Sprintf("Gw: %s", r.Gw))
}
elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags()))
elems = append(elems, fmt.Sprintf("Table: %d", r.Table))
elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm))
if r.Expires != 0 {
elems = append(elems, fmt.Sprintf("Expires: %dsec", r.Expires))
}
if r.NHID != 0 {
elems = append(elems, fmt.Sprintf("NHID: %d", r.NHID))
}
return fmt.Sprintf("{%s}", strings.Join(elems, " "))
}
func (r Route) Equal(x Route) bool {
return r.LinkIndex == x.LinkIndex &&
r.ILinkIndex == x.ILinkIndex &&
r.Scope == x.Scope &&
ipNetEqual(r.Dst, x.Dst) &&
r.Src.Equal(x.Src) &&
r.Gw.Equal(x.Gw) &&
nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) &&
r.Protocol == x.Protocol &&
r.Priority == x.Priority &&
r.Realm == x.Realm &&
r.Table == x.Table &&
r.Type == x.Type &&
r.Tos == x.Tos &&
r.Hoplimit == x.Hoplimit &&
r.Flags == x.Flags &&
(r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) &&
(r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) &&
(r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) &&
(r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap))) &&
(r.NHID == x.NHID)
}
func (r *Route) SetFlag(flag NextHopFlag) {
r.Flags |= int(flag)
}
func (r *Route) ClearFlag(flag NextHopFlag) {
r.Flags &^= int(flag)
}
type flagString struct {
f NextHopFlag
s string
}
// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
// NlFlags is only non-zero for RTM_NEWROUTE, the following flags can be set:
// - unix.NLM_F_REPLACE - Replace existing matching config object with this request
// - unix.NLM_F_EXCL - Don't replace the config object if it already exists
// - unix.NLM_F_CREATE - Create config object if it doesn't already exist
// - unix.NLM_F_APPEND - Add to the end of the object list
type RouteUpdate struct {
Type uint16
NlFlags uint16
Route
}
type NexthopInfo struct {
LinkIndex int
Hops int
Gw net.IP
Flags int
NewDst Destination
Encap Encap
Via Destination
}
func (n *NexthopInfo) String() string {
elems := []string{}
elems = append(elems, fmt.Sprintf("Ifindex: %d", n.LinkIndex))
if n.NewDst != nil {
elems = append(elems, fmt.Sprintf("NewDst: %s", n.NewDst))
}
if n.Encap != nil {
elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap))
}
if n.Via != nil {
elems = append(elems, fmt.Sprintf("Via: %s", n.Via))
}
elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1))
elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw))
elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags()))
return fmt.Sprintf("{%s}", strings.Join(elems, " "))
}
func (n NexthopInfo) Equal(x NexthopInfo) bool {
return n.LinkIndex == x.LinkIndex &&
n.Hops == x.Hops &&
n.Gw.Equal(x.Gw) &&
n.Flags == x.Flags &&
(n.NewDst == x.NewDst || (n.NewDst != nil && n.NewDst.Equal(x.NewDst))) &&
(n.Encap == x.Encap || (n.Encap != nil && n.Encap.Equal(x.Encap)))
}
type nexthopInfoSlice []*NexthopInfo
func (n nexthopInfoSlice) Equal(x []*NexthopInfo) bool {
if len(n) != len(x) {
return false
}
for i := range n {
if n[i] == nil || x[i] == nil {
return false
}
if !n[i].Equal(*x[i]) {
return false
}
}
return true
}
// ipNetEqual returns true iff both IPNet are equal
func ipNetEqual(ipn1 *net.IPNet, ipn2 *net.IPNet) bool {
if ipn1 == ipn2 {
return true
}
if ipn1 == nil || ipn2 == nil {
return false
}
m1, _ := ipn1.Mask.Size()
m2, _ := ipn2.Mask.Size()
return m1 == m2 && ipn1.IP.Equal(ipn2.IP)
}