summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller2016-07-01 22:32:27 +0200
committerDavid S. Miller2016-07-01 22:32:27 +0200
commitdc9a20020a73e81766f1a6341bcafb7f00b5f92a (patch)
treece74217c60eeda9e99a8b5e2babcf0847ab3b24f
parentMerge branch 'bpf-robustify' (diff)
parentcgroup: bpf: Add an example to do cgroup checking in BPF (diff)
downloadkernel-qcow2-linux-dc9a20020a73e81766f1a6341bcafb7f00b5f92a.tar.gz
kernel-qcow2-linux-dc9a20020a73e81766f1a6341bcafb7f00b5f92a.tar.xz
kernel-qcow2-linux-dc9a20020a73e81766f1a6341bcafb7f00b5f92a.zip
Merge branch 'bpf-cgroup2'
Martin KaFai Lau says: ==================== cgroup: bpf: cgroup2 membership test on skb This series is to implement a bpf-way to check the cgroup2 membership of a skb (sk_buff). It is similar to the feature added in netfilter: c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") The current target is the tc-like usage. v3: - Remove WARN_ON_ONCE(!rcu_read_lock_held()) - Stop BPF_MAP_TYPE_CGROUP_ARRAY usage in patch 2/4 - Avoid mounting bpf fs manually in patch 4/4 - Thanks for Daniel's review and the above suggestions - Check CONFIG_SOCK_CGROUP_DATA instead of CONFIG_CGROUPS. Thanks to the kbuild bot's report. Patch 2/4 only needs CONFIG_CGROUPS while patch 3/4 needs CONFIG_SOCK_CGROUP_DATA. Since a single bpf cgrp2 array alone is not useful for now, CONFIG_SOCK_CGROUP_DATA is also used in patch 2/4. We can fine tune it later if we find other use cases for the cgrp2 array. - Return EAGAIN instead of ENOENT if the cgrp2 array entry is NULL. It is to distinguish these two cases: 1) the userland has not populated this array entry yet. or 2) not finding cgrp2 from the skb. - Be-lated thanks to Alexei and Tejun on reviewing v1 and giving advice on this work. v2: - Fix two return cases in cgroup_get_from_fd() - Fix compilation errors when CONFIG_CGROUPS is not used: - arraymap.c: avoid registering BPF_MAP_TYPE_CGROUP_ARRAY - filter.c: tc_cls_act_func_proto() returns NULL on BPF_FUNC_skb_in_cgroup - Add comments to BPF_FUNC_skb_in_cgroup and cgroup_get_from_fd() ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/uapi/linux/bpf.h12
-rw-r--r--kernel/bpf/arraymap.c43
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c8
-rw-r--r--kernel/cgroup.c35
-rw-r--r--net/core/filter.c38
-rw-r--r--samples/bpf/Makefile3
-rw-r--r--samples/bpf/bpf_helpers.h2
-rw-r--r--samples/bpf/test_cgrp2_array_pin.c109
-rwxr-xr-xsamples/bpf/test_cgrp2_tc.sh184
-rw-r--r--samples/bpf/test_cgrp2_tc_kern.c69
12 files changed, 506 insertions, 1 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a20320c666fd..984f73b719a9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss);
struct cgroup *cgroup_get_from_path(const char *path);
+struct cgroup *cgroup_get_from_fd(int fd);
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index be6ac1291680..f44504d875e2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
};
enum bpf_prog_type {
@@ -336,6 +337,17 @@ enum bpf_func_id {
*/
BPF_FUNC_skb_change_type,
+ /**
+ * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+ * @skb: pointer to skb
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 skb failed the cgroup2 descendant test
+ * == 1 skb succeeded the cgroup2 descendant test
+ * < 0 error
+ */
+ BPF_FUNC_skb_in_cgroup,
__BPF_FUNC_MAX_ID,
};
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 4ec57a649b1f..db1a743e3db2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -537,3 +537,46 @@ static int __init register_perf_event_array_map(void)
return 0;
}
late_initcall(register_perf_event_array_map);
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
+ struct file *map_file /* not used */,
+ int fd)
+{
+ return cgroup_get_from_fd(fd);
+}
+
+static void cgroup_fd_array_put_ptr(void *ptr)
+{
+ /* cgroup_put free cgrp after a rcu grace period */
+ cgroup_put(ptr);
+}
+
+static void cgroup_fd_array_free(struct bpf_map *map)
+{
+ bpf_fd_array_map_clear(map);
+ fd_array_map_free(map);
+}
+
+static const struct bpf_map_ops cgroup_array_ops = {
+ .map_alloc = fd_array_map_alloc,
+ .map_free = cgroup_fd_array_free,
+ .map_get_next_key = array_map_get_next_key,
+ .map_lookup_elem = fd_array_map_lookup_elem,
+ .map_delete_elem = fd_array_map_delete_elem,
+ .map_fd_get_ptr = cgroup_fd_array_get_ptr,
+ .map_fd_put_ptr = cgroup_fd_array_put_ptr,
+};
+
+static struct bpf_map_type_list cgroup_array_type __read_mostly = {
+ .ops = &cgroup_array_ops,
+ .type = BPF_MAP_TYPE_CGROUP_ARRAY,
+};
+
+static int __init register_cgroup_array_map(void)
+{
+ bpf_register_map_type(&cgroup_array_type);
+ return 0;
+}
+late_initcall(register_cgroup_array_map);
+#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 22863d9872b1..96d938a22050 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -393,7 +393,8 @@ static int map_update_elem(union bpf_attr *attr)
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+ map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index eec9f90ba030..e206c2181412 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1035,6 +1035,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (func_id != BPF_FUNC_get_stackid)
goto error;
break;
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ if (func_id != BPF_FUNC_skb_in_cgroup)
+ goto error;
+ break;
default:
break;
}
@@ -1054,6 +1058,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
+ case BPF_FUNC_skb_in_cgroup:
+ if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
+ goto error;
+ break;
default:
break;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 75c0ff00aca6..50787cd61da2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -62,6 +62,7 @@
#include <linux/proc_ns.h>
#include <linux/nsproxy.h>
#include <linux/proc_ns.h>
+#include <linux/file.h>
#include <net/sock.h>
/*
@@ -6209,6 +6210,40 @@ struct cgroup *cgroup_get_from_path(const char *path)
}
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
+/**
+ * cgroup_get_from_fd - get a cgroup pointer from a fd
+ * @fd: fd obtained by open(cgroup2_dir)
+ *
+ * Find the cgroup from a fd which should be obtained
+ * by opening a cgroup directory. Returns a pointer to the
+ * cgroup on success. ERR_PTR is returned if the cgroup
+ * cannot be found.
+ */
+struct cgroup *cgroup_get_from_fd(int fd)
+{
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+ struct file *f;
+
+ f = fget_raw(fd);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
+ fput(f);
+ if (IS_ERR(css))
+ return ERR_CAST(css);
+
+ cgrp = css->cgroup;
+ if (!cgroup_on_dfl(cgrp)) {
+ cgroup_put(cgrp);
+ return ERR_PTR(-EBADF);
+ }
+
+ return cgrp;
+}
+EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+
/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
diff --git a/net/core/filter.c b/net/core/filter.c
index 76fee35da244..54071cf70fb5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2239,6 +2239,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
}
}
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *)(long)r1;
+ struct bpf_map *map = (struct bpf_map *)(long)r2;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+ struct sock *sk;
+ u32 i = (u32)r3;
+
+ sk = skb->sk;
+ if (!sk || !sk_fullsock(sk))
+ return -ENOENT;
+
+ if (unlikely(i >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[i]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+}
+
+static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
+ .func = bpf_skb_in_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+#endif
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -2307,6 +2341,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return bpf_get_event_output_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ case BPF_FUNC_skb_in_cgroup:
+ return &bpf_skb_in_cgroup_proto;
+#endif
default:
return sk_filter_func_proto(func_id);
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0bf2478cb7df..a98b780e974c 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -20,6 +20,7 @@ hostprogs-y += offwaketime
hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
+hostprogs-y += test_cgrp2_array_pin
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
+test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -61,6 +63,7 @@ always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
+always += test_cgrp2_tc_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 7904a2a493de..84e3fd919a06 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_in_cgroup;
#if defined(__x86_64__)
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
new file mode 100644
index 000000000000..70e86f7be69d
--- /dev/null
+++ b/samples/bpf/test_cgrp2_array_pin.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+ printf("Usage: test_cgrp2_array_pin [...]\n");
+ printf(" -F <file> File to pin an BPF cgroup array\n");
+ printf(" -U <file> Update an already pinned BPF cgroup array\n");
+ printf(" -v <value> Full path of the cgroup2\n");
+ printf(" -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+ const char *pinned_file = NULL, *cg2 = NULL;
+ int create_array = 1;
+ int array_key = 0;
+ int array_fd = -1;
+ int cg2_fd = -1;
+ int ret = -1;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ pinned_file = optarg;
+ break;
+ case 'U':
+ pinned_file = optarg;
+ create_array = 0;
+ break;
+ case 'v':
+ cg2 = optarg;
+ break;
+ default:
+ usage();
+ goto out;
+ }
+ }
+
+ if (!cg2 || !pinned_file) {
+ usage();
+ goto out;
+ }
+
+ cg2_fd = open(cg2, O_RDONLY);
+ if (cg2_fd < 0) {
+ fprintf(stderr, "open(%s,...): %s(%d)\n",
+ cg2, strerror(errno), errno);
+ goto out;
+ }
+
+ if (create_array) {
+ array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
+ sizeof(uint32_t), sizeof(uint32_t),
+ 1, 0);
+ if (array_fd < 0) {
+ fprintf(stderr,
+ "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
+ strerror(errno), errno);
+ goto out;
+ }
+ } else {
+ array_fd = bpf_obj_get(pinned_file);
+ if (array_fd < 0) {
+ fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+ pinned_file, strerror(errno), errno);
+ goto out;
+ }
+ }
+
+ ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0);
+ if (ret) {
+ perror("bpf_update_elem");
+ goto out;
+ }
+
+ if (create_array) {
+ ret = bpf_obj_pin(array_fd, pinned_file);
+ if (ret) {
+ fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
+ pinned_file, strerror(errno), errno);
+ goto out;
+ }
+ }
+
+out:
+ if (array_fd != -1)
+ close(array_fd);
+ if (cg2_fd != -1)
+ close(cg2_fd);
+ return ret;
+}
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
new file mode 100755
index 000000000000..0b119eeaf85c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+
+MY_DIR=$(dirname $0)
+# Details on the bpf prog
+BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
+BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
+BPF_SECTION='filter'
+
+[ -z "$TC" ] && TC='tc'
+[ -z "$IP" ] && IP='ip'
+
+# Names of the veth interface, net namespace...etc.
+HOST_IFC='ve'
+NS_IFC='vens'
+NS='ns'
+
+find_mnt() {
+ cat /proc/mounts | \
+ awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
+}
+
+# Init cgroup2 vars
+init_cgrp2_vars() {
+ CGRP2_ROOT=$(find_mnt cgroup2)
+ if [ -z "$CGRP2_ROOT" ]
+ then
+ CGRP2_ROOT='/mnt/cgroup2'
+ MOUNT_CGRP2="yes"
+ fi
+ CGRP2_TC="$CGRP2_ROOT/tc"
+ CGRP2_TC_LEAF="$CGRP2_TC/leaf"
+}
+
+# Init bpf fs vars
+init_bpf_fs_vars() {
+ local bpf_fs_root=$(find_mnt bpf)
+ [ -n "$bpf_fs_root" ] || return -1
+ BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
+}
+
+setup_cgrp2() {
+ case $1 in
+ start)
+ if [ "$MOUNT_CGRP2" == 'yes' ]
+ then
+ [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
+ mount -t cgroup2 none $CGRP2_ROOT || return $?
+ fi
+ mkdir -p $CGRP2_TC_LEAF
+ ;;
+ *)
+ rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
+ [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
+ ;;
+ esac
+}
+
+setup_bpf_cgrp2_array() {
+ local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
+ case $1 in
+ start)
+ $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
+ ;;
+ *)
+ [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
+ ;;
+ esac
+}
+
+setup_net() {
+ case $1 in
+ start)
+ $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
+ $IP link set dev $HOST_IFC up || return $?
+ sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
+
+ $IP netns add ns || return $?
+ $IP link set dev $NS_IFC netns ns || return $?
+ $IP -n $NS link set dev $NS_IFC up || return $?
+ $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
+ $TC qdisc add dev $HOST_IFC clsact || return $?
+ $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
+ ;;
+ *)
+ $IP netns del $NS
+ $IP link del $HOST_IFC
+ ;;
+ esac
+}
+
+run_in_cgrp() {
+ # Fork another bash and move it under the specified cgroup.
+ # It makes the cgroup cleanup easier at the end of the test.
+ cmd='echo $$ > '
+ cmd="$cmd $1/cgroup.procs; exec $2"
+ bash -c "$cmd"
+}
+
+do_test() {
+ run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
+ local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
+ awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
+ if [[ $dropped -eq 0 ]]
+ then
+ echo "FAIL"
+ return 1
+ else
+ echo "Successfully filtered $dropped packets"
+ return 0
+ fi
+}
+
+do_exit() {
+ if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
+ then
+ echo "------ DEBUG ------"
+ echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
+ echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
+ if [ -d "$BPF_FS_TC_SHARE" ]
+ then
+ echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
+ fi
+ echo "Host net:"
+ $IP netns
+ $IP link show dev $HOST_IFC
+ $IP -6 a show dev $HOST_IFC
+ $TC -s qdisc show dev $HOST_IFC
+ echo
+ echo "$NS net:"
+ $IP -n $NS link show dev $NS_IFC
+ $IP -n $NS -6 link show dev $NS_IFC
+ echo "------ DEBUG ------"
+ echo
+ fi
+
+ if [ "$MODE" != 'nocleanup' ]
+ then
+ setup_net stop
+ setup_bpf_cgrp2_array stop
+ setup_cgrp2 stop
+ fi
+}
+
+init_cgrp2_vars
+init_bpf_fs_vars
+
+while [[ $# -ge 1 ]]
+do
+ a="$1"
+ case $a in
+ debug)
+ DEBUG='yes'
+ shift 1
+ ;;
+ cleanup-only)
+ MODE='cleanuponly'
+ shift 1
+ ;;
+ no-cleanup)
+ MODE='nocleanup'
+ shift 1
+ ;;
+ *)
+ echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
+ echo " debug: Print cgrp and network setup details at the end of the test"
+ echo " cleanup-only: Try to cleanup things from last test. No test will be run"
+ echo " no-cleanup: Run the test but don't do cleanup at the end"
+ echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
+ echo
+ exit -1
+ ;;
+ esac
+done
+
+trap do_exit 0
+
+[ "$MODE" == 'cleanuponly' ] && exit
+
+setup_cgrp2 start || exit $?
+setup_net start || exit $?
+init_bpf_fs_vars || exit $?
+setup_bpf_cgrp2_array start || exit $?
+do_test
+echo
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
new file mode 100644
index 000000000000..2732c37c8d5b
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in6.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/pkt_cls.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_proto;
+};
+
+#define PIN_GLOBAL_NS 2
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+};
+
+struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
+ .type = BPF_MAP_TYPE_CGROUP_ARRAY,
+ .size_key = sizeof(uint32_t),
+ .size_value = sizeof(uint32_t),
+ .pinning = PIN_GLOBAL_NS,
+ .max_elem = 1,
+};
+
+SEC("filter")
+int handle_egress(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+ void *data_end = (void *)(long)skb->data_end;
+ char dont_care_msg[] = "dont care %04x %d\n";
+ char pass_msg[] = "pass\n";
+ char reject_msg[] = "reject\n";
+
+ /* single length check */
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto != htons(ETH_P_IPV6) ||
+ ip6h->nexthdr != IPPROTO_ICMPV6) {
+ bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
+ eth->h_proto, ip6h->nexthdr);
+ return TC_ACT_OK;
+ } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+ bpf_trace_printk(pass_msg, sizeof(pass_msg));
+ return TC_ACT_OK;
+ } else {
+ bpf_trace_printk(reject_msg, sizeof(reject_msg));
+ return TC_ACT_SHOT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";