summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
authorLorenzo Bianconi <lorenzo@kernel.org>2025-11-07 12:14:46 +0100
committerPablo Neira Ayuso <pablo@netfilter.org>2025-11-28 00:00:38 +0000
commitab427db17885814069bae891834f20842f0ac3a4 (patch)
treefd6e62d0fdf6e8398081ef2cd4593053596464f4 /net/netfilter
parenta0d98b641d676e9fc5c458b14aee8ee874dd7298 (diff)
netfilter: flowtable: Add IPIP rx sw acceleration
Introduce sw acceleration for rx path of IPIP tunnels relying on the netfilter flowtable infrastructure. Subsequent patches will add sw acceleration for IPIP tunnels tx path. This series introduces basic infrastructure to accelerate other tunnel types (e.g. IP6IP6). IPIP rx sw acceleration can be tested running the following scenario where the traffic is forwarded between two NICs (eth0 and eth1) and an IPIP tunnel is used to access a remote site (using eth1 as the underlay device): ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (192.168.100.2) $ip addr show 6: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet 192.168.0.2/24 scope global eth0 valid_lft forever preferred_lft forever 7: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet 192.168.1.1/24 scope global eth1 valid_lft forever preferred_lft forever 8: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000 link/ipip 192.168.1.1 peer 192.168.1.2 inet 192.168.100.1/24 scope global tun0 valid_lft forever preferred_lft forever $ip route show default via 192.168.100.2 dev tun0 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.2 192.168.1.0/24 dev eth1 proto kernel scope link src 192.168.1.1 192.168.100.0/24 dev tun0 proto kernel scope link src 192.168.100.1 $nft list ruleset table inet filter { flowtable ft { hook ingress priority filter devices = { eth0, eth1 } } chain forward { type filter hook forward priority filter; policy accept; meta l4proto { tcp, udp } flow add @ft } } Reproducing the scenario described above using veths I got the following results: - TCP stream received from the IPIP tunnel: - net-next: (baseline) ~ 71Gbps - net-next + IPIP flowtbale support: ~101Gbps Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/nf_flow_table_core.c3
-rw-r--r--net/netfilter/nf_flow_table_ip.c69
-rw-r--r--net/netfilter/nf_flow_table_path.c38
3 files changed, 97 insertions, 13 deletions
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 6c6a5165f993..06e8251a6644 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -118,7 +118,10 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->in_vlan_ingress |= BIT(j);
j++;
}
+
+ flow_tuple->tun = route->tuple[dir].in.tun;
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+ flow_tuple->tun_num = route->tuple[dir].in.num_tuns;
switch (route->tuple[dir].xmit_type) {
case FLOW_OFFLOAD_XMIT_DIRECT:
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 083ceb64ac17..d6a1f0cda189 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -145,8 +145,11 @@ static bool ip_has_options(unsigned int thoff)
static void nf_flow_tuple_encap(struct sk_buff *skb,
struct flow_offload_tuple *tuple)
{
+ __be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
struct pppoe_hdr *phdr;
+ struct iphdr *iph;
+ u16 offset = 0;
int i = 0;
if (skb_vlan_tag_present(skb)) {
@@ -159,13 +162,26 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
tuple->encap[i].proto = skb->protocol;
+ inner_proto = veth->h_vlan_encapsulated_proto;
+ offset += VLAN_HLEN;
break;
case htons(ETH_P_PPP_SES):
phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
+ inner_proto = *((__be16 *)(phdr + 1));
+ offset += PPPOE_SES_HLEN;
break;
}
+
+ if (inner_proto == htons(ETH_P_IP)) {
+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ if (iph->protocol == IPPROTO_IPIP) {
+ tuple->tun.dst_v4.s_addr = iph->daddr;
+ tuple->tun.src_v4.s_addr = iph->saddr;
+ tuple->tun.l3_proto = IPPROTO_IPIP;
+ }
+ }
}
struct nf_flowtable_ctx {
@@ -277,11 +293,46 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
+static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
+{
+ struct iphdr *iph;
+ u16 size;
+
+ if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
+ return false;
+
+ iph = (struct iphdr *)(skb_network_header(skb) + *psize);
+ size = iph->ihl << 2;
+
+ if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ if (iph->protocol == IPPROTO_IPIP)
+ *psize += size;
+
+ return true;
+}
+
+static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
+{
+ struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
+
+ if (iph->protocol != IPPROTO_IPIP)
+ return;
+
+ skb_pull(skb, iph->ihl << 2);
+ skb_reset_network_header(skb);
+}
+
static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
+ __be16 inner_proto = skb->protocol;
struct vlan_ethhdr *veth;
- __be16 inner_proto;
+ bool ret = false;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -291,19 +342,23 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
*offset += VLAN_HLEN;
- return true;
+ inner_proto = proto;
+ ret = true;
}
break;
case htons(ETH_P_PPP_SES):
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
- return true;
+ ret = true;
}
break;
}
- return false;
+ if (inner_proto == htons(ETH_P_IP))
+ ret = nf_flow_ip4_tunnel_proto(skb, offset);
+
+ return ret;
}
static void nf_flow_encap_pop(struct sk_buff *skb,
@@ -331,6 +386,9 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
break;
}
}
+
+ if (skb->protocol == htons(ETH_P_IP))
+ nf_flow_ip4_tunnel_pop(skb);
}
struct nf_flow_xmit {
@@ -356,8 +414,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
{
struct flow_offload_tuple tuple = {};
- if (skb->protocol != htons(ETH_P_IP) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
return NULL;
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
index eb9b33a1873a..73717cc32df5 100644
--- a/net/netfilter/nf_flow_table_path.c
+++ b/net/netfilter/nf_flow_table_path.c
@@ -80,6 +80,8 @@ struct nft_forward_info {
__be16 proto;
} encap[NF_FLOW_TABLE_ENCAP_MAX];
u8 num_encaps;
+ struct flow_offload_tunnel tun;
+ u8 num_tuns;
u8 ingress_vlans;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
@@ -102,6 +104,7 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
case DEV_PATH_DSA:
case DEV_PATH_VLAN:
case DEV_PATH_PPPOE:
+ case DEV_PATH_TUN:
info->indev = path->dev;
if (is_zero_ether_addr(info->h_source))
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
@@ -113,14 +116,27 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
break;
}
- /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
- if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
- info->indev = NULL;
- break;
+ /* DEV_PATH_VLAN, DEV_PATH_PPPOE and DEV_PATH_TUN */
+ if (path->type == DEV_PATH_TUN) {
+ if (info->num_tuns) {
+ info->indev = NULL;
+ break;
+ }
+ info->tun.src_v6 = path->tun.src_v6;
+ info->tun.dst_v6 = path->tun.dst_v6;
+ info->tun.l3_proto = path->tun.l3_proto;
+ info->num_tuns++;
+ } else {
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+ info->indev = NULL;
+ break;
+ }
+ info->encap[info->num_encaps].id =
+ path->encap.id;
+ info->encap[info->num_encaps].proto =
+ path->encap.proto;
+ info->num_encaps++;
}
- info->encap[info->num_encaps].id = path->encap.id;
- info->encap[info->num_encaps].proto = path->encap.proto;
- info->num_encaps++;
if (path->type == DEV_PATH_PPPOE)
memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
break;
@@ -203,6 +219,14 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
route->tuple[!dir].in.encap[i].id = info.encap[i].id;
route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
}
+
+ if (info.num_tuns) {
+ route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
+ route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
+ route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
+ route->tuple[!dir].in.num_tuns = info.num_tuns;
+ }
+
route->tuple[!dir].in.num_encaps = info.num_encaps;
route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
route->tuple[dir].out.ifindex = info.outdev->ifindex;