summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJohn Fastabend2017-08-16 07:32:47 +0200
committerDavid S. Miller2017-08-16 20:27:53 +0200
commit174a79ff9515f400b9a6115643dafd62a635b7e6 (patch)
treef48f1fc407adb9bce6fb0e5cddaabd7141acd071 /include
parentbpf: export bpf_prog_inc_not_zero (diff)
downloadkernel-qcow2-linux-174a79ff9515f400b9a6115643dafd62a635b7e6.tar.gz
kernel-qcow2-linux-174a79ff9515f400b9a6115643dafd62a635b7e6.tar.xz
kernel-qcow2-linux-174a79ff9515f400b9a6115643dafd62a635b7e6.zip
bpf: sockmap with sk redirect support
Recently we added a new map type called dev map used to forward XDP packets between ports (6093ec2dc313). This patches introduces a similar notion for sockets. A sockmap allows users to add participating sockets to a map. When sockets are added to the map enough context is stored with the map entry to use the entry with a new helper bpf_sk_redirect_map(map, key, flags) This helper (analogous to bpf_redirect_map in XDP) is given the map and an entry in the map. When called from a sockmap program, discussed below, the skb will be sent on the socket using skb_send_sock(). With the above we need a bpf program to call the helper from that will then implement the send logic. The initial site implemented in this series is the recv_sock hook. For this to work we implemented a map attach command to add attributes to a map. In sockmap we add two programs a parse program and a verdict program. The parse program uses strparser to build messages and pass them to the verdict program. The parse programs use the normal strparser semantics. The verdict program is of type SK_SKB. The verdict program returns a verdict SK_DROP, or SK_REDIRECT for now. Additional actions may be added later. When SK_REDIRECT is returned, expected when bpf program uses bpf_sk_redirect_map(), the sockmap logic will consult per cpu variables set by the helper routine and pull the sock entry out of the sock map. This pattern follows the existing redirect logic in cls and xdp programs. This gives the flow, recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock \ -> kfree_skb As an example use case a message based load balancer may use specific logic in the verdict program to select the sock to send on. Sample programs are provided in future patches that hopefully illustrate the user interfaces. Also selftests are in follow-on patches. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h7
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/uapi/linux/bpf.h33
4 files changed, 40 insertions, 3 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d6e1de8ce0fc..a4145e9c74b5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -16,6 +16,7 @@
#include <linux/rbtree_latch.h>
struct perf_event;
+struct bpf_prog;
struct bpf_map;
/* map is generic key/value storage optionally accesible by eBPF programs */
@@ -37,6 +38,8 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
+ int (*map_attach)(struct bpf_map *map,
+ struct bpf_prog *p1, struct bpf_prog *p2);
};
struct bpf_map {
@@ -138,8 +141,6 @@ enum bpf_reg_type {
PTR_TO_PACKET_END, /* skb->data + headlen */
};
-struct bpf_prog;
-
/* The information passed from prog-specific *_is_valid_access
* back to the verifier.
*/
@@ -312,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
@@ -391,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_sock_map_update_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 4b72db30dacf..fa805074d168 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -38,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d19ed3c15e1e..7015116331af 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -727,6 +727,8 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
+struct sock *do_sk_redirect_map(void);
+
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e796e384aeb..7f774769e3f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -110,6 +110,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
};
enum bpf_prog_type {
@@ -135,11 +136,15 @@ enum bpf_attach_type {
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
+ BPF_CGROUP_SMAP_INGRESS,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
+#define BPF_SOCKMAP_STRPARSER (1U << 0)
+
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
@@ -211,6 +216,7 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
+ __u32 attach_bpf_fd2;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -557,6 +563,23 @@ union bpf_attr {
* @mode: operation mode (enum bpf_adj_room_mode)
* @flags: reserved for future use
* Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ * Redirect skb to a sock in map using key as a lookup key for the
+ * sock in map.
+ * @map: pointer to sockmap
+ * @key: key to lookup sock in map
+ * @flags: reserved for future use
+ * Return: SK_REDIRECT
+ *
+ * int bpf_sock_map_update(skops, map, key, flags, map_flags)
+ * @skops: pointer to bpf_sock_ops
+ * @map: pointer to sockmap to update
+ * @key: key to insert/update sock in map
+ * @flags: same flags as map update elem
+ * @map_flags: sock map specific flags
+ * bit 1: Enable strparser
+ * other bits: reserved
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -610,7 +633,9 @@ union bpf_attr {
FN(set_hash), \
FN(setsockopt), \
FN(skb_adjust_room), \
- FN(redirect_map),
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -747,6 +772,12 @@ struct xdp_md {
__u32 data_end;
};
+enum sk_action {
+ SK_ABORTED = 0,
+ SK_DROP,
+ SK_REDIRECT,
+};
+
#define BPF_TAG_SIZE 8
struct bpf_prog_info {