summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWillem de Bruijn2019-03-22 19:32:56 +0100
committerAlexei Starovoitov2019-03-22 21:52:45 +0100
commit868d523535c2d00b696753ece606e641a816e91e (patch)
tree15da75a49cc199d51682617dc9974239c7933c76
parentbpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_FIXED_GSO (diff)
downloadkernel-qcow2-linux-868d523535c2d00b696753ece606e641a816e91e.tar.gz
kernel-qcow2-linux-868d523535c2d00b696753ece606e641a816e91e.tar.xz
kernel-qcow2-linux-868d523535c2d00b696753ece606e641a816e91e.zip
bpf: add bpf_skb_adjust_room encap flags
When pushing tunnel headers, annotate skbs in the same way as tunnel devices. For GSO packets, the network stack requires certain fields set to segment packets with tunnel headers. gro_gse_segment depends on transport and inner mac header, for instance. Add an option to pass this information. Remove the restriction on len_diff to network header length, which is too short, e.g., for GRE protocols. Changes v1->v2: - document new flags - BPF_F_ADJ_ROOM_MASK moved v2->v3: - BPF_F_ADJ_ROOM_ENCAP_L3_MASK moved Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/uapi/linux/bpf.h16
-rw-r--r--net/core/filter.c66
2 files changed, 76 insertions, 6 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4f157d0ec571..837024512baf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1486,11 +1486,20 @@ union bpf_attr {
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header).
*
- * There is one supported flag at this time:
+ * The following flags are supported at this time:
*
* * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
* Adjusting mss in this way is not allowed for datagrams.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -2632,6 +2641,11 @@ enum bpf_func_id {
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
diff --git a/net/core/filter.c b/net/core/filter.c
index d3240a0a0eeb..c1d19b074d6c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2963,11 +2963,20 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
}
}
-#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+
+#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
+ BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
+ bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
+ unsigned int gso_type = SKB_GSO_DODGY;
+ u16 mac_len, inner_net, inner_trans;
int ret;
if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
@@ -2981,10 +2990,60 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;
+ if (encap) {
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))
+ return -ENOTSUPP;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ return -EINVAL;
+
+ if (skb->encapsulation)
+ return -EALREADY;
+
+ mac_len = skb->network_header - skb->mac_header;
+ inner_net = skb->network_header;
+ inner_trans = skb->transport_header;
+ }
+
ret = bpf_skb_net_hdr_push(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;
+ if (encap) {
+ /* inner mac == inner_net on l3 encap */
+ skb->inner_mac_header = inner_net;
+ skb->inner_network_header = inner_net;
+ skb->inner_transport_header = inner_trans;
+ skb_set_inner_protocol(skb, skb->protocol);
+
+ skb->encapsulation = 1;
+ skb_set_network_header(skb, mac_len);
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
+ gso_type |= SKB_GSO_GRE;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ gso_type |= SKB_GSO_IPXIP6;
+ else
+ gso_type |= SKB_GSO_IPXIP4;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
+ flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
+ int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
+ sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr);
+
+ skb_set_transport_header(skb, mac_len + nh_len);
+ }
+ }
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -2993,7 +3052,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
skb_decrease_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_type |= gso_type;
shinfo->gso_segs = 0;
}
@@ -3044,7 +3103,6 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32, mode, u64, flags)
{
- bool trans_same = skb->transport_header == skb->network_header;
u32 len_cur, len_diff_abs = abs(len_diff);
u32 len_min = bpf_skb_net_base_len(skb);
u32 len_max = __bpf_skb_max_len(skb);
@@ -3073,8 +3131,6 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
}
len_cur = skb->len - skb_network_offset(skb);
- if (skb_transport_header_was_set(skb) && !trans_same)
- len_cur = skb_network_header_len(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
(!shrink && (skb->len + len_diff_abs > len_max &&