summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller2018-08-07 21:22:15 +0200
committerDavid S. Miller2018-08-07 21:22:15 +0200
commitd5082b27edf4cc653529c38ac531cf7a7d261a6f (patch)
tree024cd46c881727a9d71df68cdc0d76524b844059
parentMerge branch 'WoL-filters' (diff)
parentnfp: flower: add geneve option match offload (diff)
downloadkernel-qcow2-linux-d5082b27edf4cc653529c38ac531cf7a7d261a6f.tar.gz
kernel-qcow2-linux-d5082b27edf4cc653529c38ac531cf7a7d261a6f.tar.xz
kernel-qcow2-linux-d5082b27edf4cc653529c38ac531cf7a7d261a6f.zip
Merge branch 'nfp-ttl-tos-geneve'
Simon Horman says: ==================== nfp: flower: tunnel TTL & TOS, and Geneve options set & match support this series contains updates for the TC Flower classifier and the offload facility for it in the NFP driver. * Patches 1 & 2: update the NFP driver to allow offload of matching and setting tunnel ToS/TTL of flows using the TC Flower classifier and tun_key action * Patches 3 & 4: enhance the flow dissector and TC Flower classifier to allow match on Geneve options * Patch 5 & 6: update the NFP driver to allow offload of matching and setting Geneve options of flows using the TC Flower classifier and tun_key action ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c139
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h33
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c34
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c42
-rw-r--r--include/net/flow_dissector.h17
-rw-r--r--include/uapi/linux/pkt_cls.h26
-rw-r--r--net/core/flow_dissector.c19
-rw-r--r--net/sched/cls_flower.c244
9 files changed, 525 insertions, 30 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index e56b815a8dc6..0ba0356ec4e6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -32,6 +32,7 @@
*/
#include <linux/bitfield.h>
+#include <net/geneve.h>
#include <net/pkt_cls.h>
#include <net/switchdev.h>
#include <net/tc_act/tc_csum.h>
@@ -45,7 +46,15 @@
#include "main.h"
#include "../nfp_net_repr.h"
-#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (TUNNEL_CSUM | TUNNEL_KEY)
+/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable
+ * to change. Such changes will break our FW ABI.
+ */
+#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01)
+#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04)
+#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800)
+#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \
+ NFP_FL_TUNNEL_KEY | \
+ NFP_FL_TUNNEL_GENEVE_OPT)
static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
{
@@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
}
static int
-nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
+nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
+ const struct tc_action *action)
+{
+ struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+ int opt_len, opt_cnt, act_start, tot_push_len;
+ u8 *src = ip_tunnel_info_opts(ip_tun);
+
+ /* We need to populate the options in reverse order for HW.
+ * Therefore we go through the options, calculating the
+ * number of options and the total size, then we populate
+ * them in reverse order in the action list.
+ */
+ opt_cnt = 0;
+ tot_push_len = 0;
+ opt_len = ip_tun->options_len;
+ while (opt_len > 0) {
+ struct geneve_opt *opt = (struct geneve_opt *)src;
+
+ opt_cnt++;
+ if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT)
+ return -EOPNOTSUPP;
+
+ tot_push_len += sizeof(struct nfp_fl_push_geneve) +
+ opt->length * 4;
+ if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT)
+ return -EOPNOTSUPP;
+
+ opt_len -= sizeof(struct geneve_opt) + opt->length * 4;
+ src += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+
+ if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ act_start = *list_len;
+ *list_len += tot_push_len;
+ src = ip_tunnel_info_opts(ip_tun);
+ while (opt_cnt) {
+ struct geneve_opt *opt = (struct geneve_opt *)src;
+ struct nfp_fl_push_geneve *push;
+ size_t act_size, len;
+
+ opt_cnt--;
+ act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4;
+ tot_push_len -= act_size;
+ len = act_start + tot_push_len;
+
+ push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len];
+ push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE;
+ push->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+ push->reserved = 0;
+ push->class = opt->opt_class;
+ push->type = opt->type;
+ push->length = opt->length;
+ memcpy(&push->opt_data, opt->opt_data, opt->length * 4);
+
+ src += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+
+ return 0;
+}
+
+static int
+nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
+ struct nfp_fl_set_ipv4_udp_tun *set_tun,
const struct tc_action *action,
struct nfp_fl_pre_tunnel *pre_tun,
enum nfp_flower_tun_type tun_type,
@@ -237,19 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
{
size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+ struct nfp_flower_priv *priv = app->priv;
u32 tmp_set_ip_tun_type_index = 0;
- struct flowi4 flow = {};
/* Currently support one pre-tunnel so index is always 0. */
int pretun_idx = 0;
- struct rtable *rt;
- struct net *net;
- int err;
- if (ip_tun->options_len)
+ BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM ||
+ NFP_FL_TUNNEL_KEY != TUNNEL_KEY ||
+ NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
+ if (ip_tun->options_len &&
+ (tun_type != NFP_FL_TUNNEL_GENEVE ||
+ !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)))
return -EOPNOTSUPP;
- net = dev_net(netdev);
-
set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
@@ -261,28 +334,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
set_tun->tun_id = ip_tun->key.tun_id;
- /* Do a route lookup to determine ttl - if fails then use default.
- * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so
- * must be defined here.
- */
- flow.daddr = ip_tun->key.u.ipv4.dst;
- flow.flowi4_proto = IPPROTO_UDP;
- rt = ip_route_output_key(net, &flow);
- err = PTR_ERR_OR_ZERO(rt);
- if (!err) {
- set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
- ip_rt_put(rt);
+ if (ip_tun->key.ttl) {
+ set_tun->ttl = ip_tun->key.ttl;
} else {
- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ struct net *net = dev_net(netdev);
+ struct flowi4 flow = {};
+ struct rtable *rt;
+ int err;
+
+ /* Do a route lookup to determine ttl - if fails then use
+ * default. Note that CONFIG_INET is a requirement of
+ * CONFIG_NET_SWITCHDEV so must be defined here.
+ */
+ flow.daddr = ip_tun->key.u.ipv4.dst;
+ flow.flowi4_proto = IPPROTO_UDP;
+ rt = ip_route_output_key(net, &flow);
+ err = PTR_ERR_OR_ZERO(rt);
+ if (!err) {
+ set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
+ ip_rt_put(rt);
+ } else {
+ set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ }
}
set_tun->tos = ip_tun->key.tos;
- if (!(ip_tun->key.tun_flags & TUNNEL_KEY) ||
+ if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)
return -EOPNOTSUPP;
set_tun->tun_flags = ip_tun->key.tun_flags;
+ if (tun_type == NFP_FL_TUNNEL_GENEVE) {
+ set_tun->tun_proto = htons(ETH_P_TEB);
+ set_tun->tun_len = ip_tun->options_len / 4;
+ }
+
/* Complete pre_tunnel action. */
pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
@@ -671,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
*a_len += sizeof(struct nfp_fl_pre_tunnel);
+ err = nfp_fl_push_geneve_options(nfp_fl, a_len, a);
+ if (err)
+ return err;
+
set_tun = (void *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type,
- netdev);
+ err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun,
+ *tun_type, netdev);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 15f1eacd76b6..325954b829c8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -37,6 +37,7 @@
#include <linux/bitfield.h>
#include <linux/skbuff.h>
#include <linux/types.h>
+#include <net/geneve.h>
#include "../nfp_app.h"
#include "../nfpcore/nfp_cpp.h"
@@ -51,6 +52,7 @@
#define NFP_FLOWER_LAYER_VXLAN BIT(7)
#define NFP_FLOWER_LAYER2_GENEVE BIT(5)
+#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6)
#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13)
#define NFP_FLOWER_MASK_VLAN_CFI BIT(12)
@@ -81,6 +83,11 @@
#define NFP_FL_MAX_A_SIZ 1216
#define NFP_FL_LW_SIZ 2
+/* Maximum allowed geneve options */
+#define NFP_FL_MAX_GENEVE_OPT_ACT 32
+#define NFP_FL_MAX_GENEVE_OPT_CNT 64
+#define NFP_FL_MAX_GENEVE_OPT_KEY 32
+
/* Action opcodes */
#define NFP_FL_ACTION_OPCODE_OUTPUT 0
#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
@@ -94,6 +101,7 @@
#define NFP_FL_ACTION_OPCODE_SET_TCP 15
#define NFP_FL_ACTION_OPCODE_PRE_LAG 16
#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17
+#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26
#define NFP_FL_ACTION_OPCODE_NUM 32
#define NFP_FL_OUT_FLAGS_LAST BIT(15)
@@ -206,7 +214,19 @@ struct nfp_fl_set_ipv4_udp_tun {
__be16 tun_flags;
u8 ttl;
u8 tos;
- __be32 extra[2];
+ __be32 extra;
+ u8 tun_len;
+ u8 res2;
+ __be16 tun_proto;
+};
+
+struct nfp_fl_push_geneve {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ __be16 class;
+ u8 type;
+ u8 length;
+ u8 opt_data[];
};
/* Metadata with L2 (1W/4B)
@@ -346,7 +366,7 @@ struct nfp_flower_ipv6 {
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | ipv4_addr_dst |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Reserved |
+ * | Reserved | tos | ttl |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -356,10 +376,17 @@ struct nfp_flower_ipv6 {
struct nfp_flower_ipv4_udp_tun {
__be32 ip_src;
__be32 ip_dst;
- __be32 reserved[2];
+ __be16 reserved1;
+ u8 tos;
+ u8 ttl;
+ __be32 reserved2;
__be32 tun_id;
};
+struct nfp_flower_geneve_options {
+ u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
+};
+
#define NFP_FL_TUN_VNI_OFFSET 8
/* The base header for a control message packet.
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index ef2114d13387..85f8209bf007 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -69,6 +69,7 @@ struct nfp_app;
/* Extra features bitmap. */
#define NFP_FL_FEATS_GENEVE BIT(0)
#define NFP_FL_NBI_MTU_SETTING BIT(1)
+#define NFP_FL_FEATS_GENEVE_OPT BIT(2)
#define NFP_FL_FEATS_LAG BIT(31)
struct nfp_fl_mask_id {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 84f7a5dbea9d..a0c72f277faa 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
}
+static int
+nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow,
+ bool mask_version)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_enc_opts *opts;
+
+ opts = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS,
+ target);
+ memcpy(key_buf, opts->data, opts->len);
+
+ return 0;
+}
+
static void
nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
struct tc_cls_flower_offload *flow,
@@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
struct flow_dissector_key_ipv4_addrs *tun_ips;
struct flow_dissector_key_keyid *vni;
+ struct flow_dissector_key_ip *ip;
memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
@@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
frame->ip_src = tun_ips->src;
frame->ip_dst = tun_ips->dst;
}
+
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ ip = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IP,
+ target);
+ frame->tos = ip->tos;
+ frame->ttl = ip->ttl;
+ }
}
int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
@@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
nfp_flow->nfp_tun_ipv4_addr = tun_dst;
nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
}
+
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+ err = nfp_flower_compile_geneve_opt(ext, flow, false);
+ if (err)
+ return err;
+
+ err = nfp_flower_compile_geneve_opt(msk, flow, true);
+ if (err)
+ return err;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 6bc8a97f7e03..2edab01c3beb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -66,6 +66,8 @@
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \
BIT(FLOW_DISSECTOR_KEY_MPLS) | \
BIT(FLOW_DISSECTOR_KEY_IP))
@@ -74,7 +76,9 @@
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
- BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP))
#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
@@ -139,6 +143,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
}
static int
+nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
+ u32 *key_layer_two, int *key_size)
+{
+ if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY)
+ return -EOPNOTSUPP;
+
+ if (enc_opts->len > 0) {
+ *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
+ *key_size += sizeof(struct nfp_flower_geneve_options);
+ }
+
+ return 0;
+}
+
+static int
nfp_flower_calculate_key_layers(struct nfp_app *app,
struct nfp_fl_key_ls *ret_key_ls,
struct tc_cls_flower_offload *flow,
@@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
u32 key_layer_two;
u8 key_layer;
int key_size;
+ int err;
if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
return -EOPNOTSUPP;
@@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
struct flow_dissector_key_ports *mask_enc_ports = NULL;
+ struct flow_dissector_key_enc_opts *enc_op = NULL;
struct flow_dissector_key_ports *enc_ports = NULL;
struct flow_dissector_key_control *mask_enc_ctl =
skb_flow_dissector_target(flow->dissector,
@@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
if (mask_enc_ports->dst != cpu_to_be16(~0))
return -EOPNOTSUPP;
+ if (dissector_uses_key(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ enc_op = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS,
+ flow->key);
+ }
+
switch (enc_ports->dst) {
case htons(NFP_FL_VXLAN_PORT):
*tun_type = NFP_FL_TUNNEL_VXLAN;
key_layer |= NFP_FLOWER_LAYER_VXLAN;
key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (enc_op)
+ return -EOPNOTSUPP;
break;
case htons(NFP_FL_GENEVE_PORT):
if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE))
@@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
key_size += sizeof(struct nfp_flower_ext_meta);
key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (!enc_op)
+ break;
+ if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))
+ return -EOPNOTSUPP;
+ err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two,
+ &key_size);
+ if (err)
+ return err;
break;
default:
return -EOPNOTSUPP;
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2a17f041f7a1..6a4586dcdede 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -57,6 +57,21 @@ struct flow_dissector_key_mpls {
mpls_label:20;
};
+#define FLOW_DIS_TUN_OPTS_MAX 255
+/**
+ * struct flow_dissector_key_enc_opts:
+ * @data: tunnel option data
+ * @len: length of tunnel option data
+ * @dst_opt_type: tunnel option type
+ */
+struct flow_dissector_key_enc_opts {
+ u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired
+ * here but seems difficult to #include
+ */
+ u8 len;
+ __be16 dst_opt_type;
+};
+
struct flow_dissector_key_keyid {
__be32 keyid;
};
@@ -208,6 +223,8 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
+ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
+
FLOW_DISSECTOR_KEY_MAX,
};
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 48e5b5d49a34..be382fb0592d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -480,12 +480,38 @@ enum {
TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */
TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
+ TCA_FLOWER_KEY_ENC_OPTS,
+ TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
__TCA_FLOWER_MAX,
};
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
enum {
+ TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+ TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+ * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+ * attributes
+ */
+ __TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */
+
+ __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+ (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
+enum {
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 08a5184f4b34..ce9eeeb7c024 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS) &&
!dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ENC_IP))
+ FLOW_DISSECTOR_KEY_ENC_IP) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS))
return;
info = skb_tunnel_info(skb);
@@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
ip->tos = key->tos;
ip->ttl = key->ttl;
}
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ struct flow_dissector_key_enc_opts *enc_opt;
+
+ enc_opt = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS,
+ target_container);
+
+ if (info->options_len) {
+ enc_opt->len = info->options_len;
+ ip_tunnel_info_opts_get(enc_opt->data, info);
+ enc_opt->dst_opt_type = info->key.tun_flags &
+ TUNNEL_OPTIONS_PRESENT;
+ }
+ }
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index a3b69bb6f4b0..9da244235170 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -24,6 +24,7 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
+#include <net/geneve.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -53,6 +54,7 @@ struct fl_flow_key {
struct flow_dissector_key_tcp tcp;
struct flow_dissector_key_ip ip;
struct flow_dissector_key_ip enc_ip;
+ struct flow_dissector_key_enc_opts enc_opts;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
+ .len = 128 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap,
fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
}
+static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
+ struct nlattr *class = NULL, *type = NULL, *data = NULL;
+ struct geneve_opt *opt;
+ int err, data_len = 0;
+
+ if (option_len > sizeof(struct geneve_opt))
+ data_len = option_len - sizeof(struct geneve_opt);
+
+ opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
+ memset(opt, 0xff, option_len);
+ opt->length = data_len / 4;
+ opt->r1 = 0;
+ opt->r2 = 0;
+ opt->r3 = 0;
+
+ /* If no mask has been prodived we assume an exact match. */
+ if (!depth)
+ return sizeof(struct geneve_opt) + data_len;
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
+ NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+ nla, geneve_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ /* We are not allowed to omit any of CLASS, TYPE or DATA
+ * fields from the key.
+ */
+ if (!option_len &&
+ (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+ return -EINVAL;
+ }
+
+ /* Omitting any of CLASS, TYPE or DATA fields is allowed
+ * for the mask.
+ */
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
+ int new_len = key->enc_opts.len;
+
+ data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
+ data_len = nla_len(data);
+ if (data_len < 4) {
+ NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+ return -ERANGE;
+ }
+ if (data_len % 4) {
+ NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+ return -ERANGE;
+ }
+
+ new_len += sizeof(struct geneve_opt) + data_len;
+ BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
+ if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
+ NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+ return -ERANGE;
+ }
+ opt->length = data_len / 4;
+ memcpy(opt->opt_data, nla_data(data), data_len);
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
+ class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
+ opt->opt_class = nla_get_be16(class);
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
+ type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
+ opt->type = nla_get_u8(type);
+ }
+
+ return sizeof(struct geneve_opt) + data_len;
+}
+
+static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
+ int option_len, key_depth, msk_depth = 0;
+
+ nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+ nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+ msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+ }
+
+ nla_for_each_attr(nla_opt_key, nla_enc_key,
+ nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
+ switch (nla_type(nla_opt_key)) {
+ case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ option_len = fl_set_geneve_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ option_len = fl_set_geneve_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+
+ if (msk_depth)
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+ if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
+ ret = fl_set_enc_opt(tb, key, mask, extack);
+ if (ret)
+ return ret;
+ }
+
if (tb[TCA_FLOWER_KEY_FLAGS])
ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
@@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
}
+static int fl_dump_key_geneve_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct geneve_opt *opt;
+ struct nlattr *nest;
+ int opt_off = 0;
+
+ nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
+ if (!nest)
+ goto nla_put_failure;
+
+ while (enc_opts->len > opt_off) {
+ opt = (struct geneve_opt *)&enc_opts->data[opt_off];
+
+ if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
+ opt->opt_class))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,
+ opt->type))
+ goto nla_put_failure;
+ if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,
+ opt->length * 4, opt->opt_data))
+ goto nla_put_failure;
+
+ opt_off += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct nlattr *nest;
+ int err;
+
+ if (!enc_opts->len)
+ return 0;
+
+ nest = nla_nest_start(skb, enc_opt_type);
+ if (!nest)
+ goto nla_put_failure;
+
+ switch (enc_opts->dst_opt_type) {
+ case TUNNEL_GENEVE_OPT:
+ err = fl_dump_key_geneve_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
+ default:
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int fl_dump_key_enc_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *key_opts,
+ struct flow_dissector_key_enc_opts *msk_opts)
+{
+ int err;
+
+ err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts);
+ if (err)
+ return err;
+
+ return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts);
+}
+
static int fl_dump_key(struct sk_buff *skb, struct net *net,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
@@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
&mask->enc_tp.dst,
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
sizeof(key->enc_tp.dst)) ||
- fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
+ fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) ||
+ fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
goto nla_put_failure;
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))